diff --git a/docs/conf.py b/docs/conf.py index 051b43b319..b6bc97c4e4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -199,6 +199,7 @@ .. _cmake: https://cmake.org .. _scikit-build-core: https://scikit-build-core.readthedocs.io/en/latest/ .. _netcdf4-py: https://github.com/Unidata/netcdf4-python +.. _pyyaml: https://pyyaml.org/wiki/PyYAMLDocumentation .. _numpy: https://www.numpy.org/ .. _scipy: https://docs.scipy.org/doc/scipy .. _pyparsing: https://github.com/pyparsing/pyparsing diff --git a/docs/quickstart/install.rst b/docs/quickstart/install.rst index e935c87095..27b1351bae 100644 --- a/docs/quickstart/install.rst +++ b/docs/quickstart/install.rst @@ -75,6 +75,7 @@ to run sisl, so generally one shouldn't worry about getting correct packages etc. Here the more detailed requirements are listed. - `Python`_ 3.9 or above +- `pyyaml`_ - `numpy`_ - `scipy`_ - `xarray`_ diff --git a/docs/toolbox/basis/basis.rst b/docs/toolbox/basis/basis.rst new file mode 100644 index 0000000000..eb03064961 --- /dev/null +++ b/docs/toolbox/basis/basis.rst @@ -0,0 +1,93 @@ +Basis optimization +================== + +.. currentmodule:: sisl_toolbox.siesta.minimizer + +Optimizing a basis set for SIESTA is a cumbersome task. +The goal of this toolbox is to allow users to **optimize a basis set with just one CLI command.** + +The commands and their options can be accessed like: + +.. code-block:: bash + + stoolbox basis --help + +In summary, whenever one wants to optimize a basis set for a given system, +the first step is to create a directory with the input files to run the +calculation. This directory should contain, as usual: + +- The ``.fdf`` files with all the input parameters for the calculation. +- The pseudopotentials (``.psf`` or ``.psml`` files). + +Then, one can directly run the optimization: + +.. code-block:: bash + + stoolbox basis optimize --geometry input.fdf + +with ``input.fdf`` being the input file for the calculation. This will use all the default +values. Since there are many possible tweaks, we invite you to read carefully the help +message from the CLI. Here we will just mention some important things that could go unnoticed. + +**Basis enthalpy:** The quantity that is minimized is the basis enthalpy. This is :math:`H = E + pV` +with :math:`E` being the energy of the system, :math:`V` the volume of the basis and :math:`p` a "pressure" that +is defined in the fdf file with the ```BasisPressure`` table. This "pressure" penalizes bigger +basis, which result in more expensive calculations. It is the responsibility of the user to +set this value. As a rule of thumb, we recommend to set it to ``0.02 GPa`` for the first two +rows of the periodic table and ``0.2 GPa`` for the rest. + +**The SIESTA command:** There is a ``--siesta-cmd`` option to specify the way of executing SIESTA. By default, it +is simply calling the ``siesta`` command, but you could set it for example to ``mpirun -n 4 siesta`` +so that SIESTA is ran in parallel. + +There is no problem with using this CLI in clusters inside a submitted job, for example. + +**A custom basis definition:** It may happen that the conventional optimizable parameters as well as their lower and +upper bounds are not good for your case (e.g. you would like the upper bound for a cutoff +radius to be higher). In that case, you can create a custom ``--basis-spec``. The best way +to do it is by calling + +.. code-block:: bash + + stoolbox basis build --geometry input.fdf + +which will generate a yaml file with a basis specification that you can tweak manually. +Then, you can pass it directly to the optimization using the ``--config`` option: + +.. code-block:: bash + + stoolbox basis optimize --geometry input.fdf --config my_config.yaml + +**Installing the optimizers:** The default optimizer is BADS (https://github.com/acerbilab/bads) +which is the one that we have found works best to optimize basis sets. The optimizer is however +not installed by default. You can install it using pip: + +.. code-block:: bash + + pip install pybads + +and the same would apply for other optimizers that you may want to use. + +**Output:** The output that appears on the terminal is left to the particular optimizer. +However, sisl generates ``.dat`` files which contain information about each SIESTA execution. +These files contain one column for each variable being optimized and one column for the +metric to minimize. + + +Python API +---------- + +The functions that do the work are also usable in python code by importing them: + +.. code-block:: python + + from sisl_toolbox.siesta.minimizer import optimize_basis, write_basis_to_yaml + +Here is their documentation: + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + optimize_basis + write_basis_to_yaml diff --git a/docs/toolbox/index.rst b/docs/toolbox/index.rst index e3f225dbfb..7221951736 100644 --- a/docs/toolbox/index.rst +++ b/docs/toolbox/index.rst @@ -22,10 +22,12 @@ Toolboxes should be imported directly. The implemented toolboxes are listed here: - + .. toctree:: :maxdepth: 1 transiesta/ts_fft siesta/atom_plot btd/btd + siesta/minimizer + basis/basis diff --git a/docs/toolbox/siesta/minimizer.rst b/docs/toolbox/siesta/minimizer.rst new file mode 100644 index 0000000000..aebdec4395 --- /dev/null +++ b/docs/toolbox/siesta/minimizer.rst @@ -0,0 +1,33 @@ +Minimizers +================= + +.. currentmodule:: sisl_toolbox.siesta.minimizer + +In `sisl_toolbox.siesta.minimizer` there is a collection of minimizers +that given some `variables`, a `runner` and a `metric` optimizes the +variables to minimize the metric. + +These are the minimizer classes implemented: + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + BaseMinimize + LocalMinimize + DualAnnealingMinimize + BADSMinimize + ParticleSwarmsMinimize + +For each of them, there is a subclass particularly tailored to optimize +SIESTA runs: + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + MinimizeSiesta + LocalMinimizeSiesta + DualAnnealingMinimizeSiesta + BADSMinimizeSiesta + ParticleSwarmsMinimizeSiesta diff --git a/pyproject.toml b/pyproject.toml index eea2e1c801..ed05485487 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ keywords = [ # Now all dependencies that were released around the same time should be the # lower bound of dependencies. dependencies = [ + "pyyaml", # We need npt.NDArray "numpy>=1.21", "scipy>=1.6", diff --git a/src/sisl/_core/periodictable.py b/src/sisl/_core/periodictable.py index 1aa7b1ef27..a14326d19b 100644 --- a/src/sisl/_core/periodictable.py +++ b/src/sisl/_core/periodictable.py @@ -900,7 +900,7 @@ def conv(row, col): return "" if row.ndim == 0: - return conv(row[()], col[()]) + return np.asarray(conv(row, col), dtype=str) return np.asarray(list(map(conv, row, col)), dtype=str) @classmethod @@ -939,7 +939,7 @@ def conv(Z): return -1 if Z.ndim == 0: - return conv(Z[()]) + return np.asarray(conv(Z[()]), dtype=int) return np.asarray(list(map(conv, Z)), dtype=int) @classmethod diff --git a/src/sisl/_lib/_argparse.py b/src/sisl/_lib/_argparse.py index 110efed924..72aa6b6388 100644 --- a/src/sisl/_lib/_argparse.py +++ b/src/sisl/_lib/_argparse.py @@ -1,11 +1,230 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import argparse +import inspect +import typing +from typing import Any, Callable, Literal, Optional, Union + +from sisl._lib._docscrape import FunctionDoc + try: - from rich_argparse import RichHelpFormatter + from rich_argparse import RawTextRichHelpFormatter - SislHelpFormatter = RichHelpFormatter + SislHelpFormatter = RawTextRichHelpFormatter except ImportError: - import argparse - SislHelpFormatter = argparse.RawDescriptionHelpFormatter + + +def is_optional(field): + """Check whether the annotation for a parameter is an Optional type.""" + return is_union(field) and type(None) in typing.get_args(field) + + +def is_union(field): + """Check whether the annotation for a parameter is an Union type.""" + return typing.get_origin(field) is Union + + +def is_literal(field): + """Check whether the annotation for a parameter is a Literal type.""" + return typing.get_origin(field) is Literal + + +def get_optional_arg(field): + """Get the type of an optional argument from the typehint. + + It only works if the annotation only has one type. + + E.g.: Optional[int] -> int + E.g.: Optional[Union[int, str]] -> raises ValueError + """ + if not is_optional(field): + return field + + args = typing.get_args(field) + if len(args) > 2: + raise ValueError("Optional type must have at most 2 arguments") + for arg in args: + if arg is not type(None): + return arg + + raise ValueError("No non-None type found in Union") + + +def get_literal_args(field): + """Get the values of a literal. + + E.g.: Literal[1, 2, 3] -> (1, 2, 3) + """ + return typing.get_args(field) + + +class NotPassedArg: + """Placeholder to use for arguments that have not been passed. + + By setting this as the default value for an argument, we can + later check if the argument was passed through the CLI or not. + """ + + def __init__(self, val): + self.val = val + + def __repr__(self): + return repr(self.val) + + def __str__(self): + return str(self.val) + + def __eq__(self, other): + return self.val == other + + def __getattr__(self, name): + return getattr(self.val, name) + + +def get_runner(func): + """Wraps a function to receive the args parsed from argparse""" + + def _runner(args): + + # Get the config argument. If present, load the arguments + # from the config (yaml) file. + config = getattr(args, "config", None) + config_args = {} + if config is not None: + import yaml + + with open(args.config, "r") as f: + config_args = yaml.safe_load(f) + + # Build the final arguments dictionary, using the arguments of the + # config file as defaults. + final_args = {} + for k, v in vars(args).items(): + if k in ("runner", "config"): + continue + elif isinstance(v, NotPassedArg): + final_args[k] = config_args.get(k, v.val) + else: + final_args[k] = v + + # And call the function + return func(**final_args) + + return _runner + + +def get_argparse_parser( + func: Callable, + name: Optional[str] = None, + subp=None, + parser_kwargs: dict[str, Any] = {}, + arg_aliases: dict[str, str] = {}, + defaults: dict[str, Any] = {}, + add_config: bool = True, +) -> argparse.ArgumentParser: + """Creates an argument parser from a function's signature and docstring. + + The created argument parser just mimics the function. It is a CLI version + of the function. + + Parameters + ---------- + func : + The function to create the parser for. + name : + The name of the parser. If None, the function's name is used. + subp : + The subparser to add the parser to. If None, a new isolated + parser is created. + parser_kwargs : + Additional arguments to pass to the parser. + arg_aliases : + Dictionary holding aliases (shortcuts) for the arguments. The keys + of this dictionary are the argument names, and the values are the + aliases. For example, if the function has an argument called + `--size`, and you want to add a shortcut `-s`, you can pass + `arg_aliases={"size": "s"}`. + defaults : + Dictionary holding default values for the arguments. The keys + of this dictionary are the argument names, and the values are + the default values. The defaults are taken from the function's + signature if not specified. + add_config : + If True, adds a `--config` argument to the parser. This + argument accepts a path to a YAML file containing the + arguments for the function. + """ + + # Check if the function needs to be added as a subparser + is_sub = not subp is None + + # Get the function's information + fdoc = FunctionDoc(func) + signature = inspect.signature(func) + + # Initialize parser + title = "".join(fdoc["Summary"]) + parser_help = "\n".join(fdoc["Extended Summary"]) + if is_sub: + p = subp.add_parser( + name or func.__name__.replace("_", "-"), + description=parser_help, + help=title, + **parser_kwargs, + ) + else: + p = argparse.ArgumentParser(title, **parser_kwargs) + + # Add the config argument to load the arguments from a YAML file + if add_config: + p.add_argument( + "--config", + "-c", + type=str, + default=None, + help="Path to a YAML file containing the arguments for the command", + ) + + group = p.add_argument_group("Function arguments") + + # Add all the function's parameters to the parser + parameters_help = {p.name: p.desc for p in fdoc["Parameters"]} + for param in signature.parameters.values(): + + arg_names = [f"--{param.name.replace('_', '-')}"] + if param.name in arg_aliases: + arg_names.append(f"-{arg_aliases[param.name]}") + + annotation = param.annotation + if is_optional(annotation): + annotation = get_optional_arg(annotation) + + choices = None + if is_literal(annotation): + choices = get_literal_args(annotation) + annotation = type(choices[0]) + + # If the annotation is some-kind of union, just replace + # it with a string... + if is_union(annotation): + annotation = str + + group.add_argument( + *arg_names, + type=annotation, + default=NotPassedArg(param.default), + choices=choices, + action=argparse.BooleanOptionalAction if annotation is bool else None, + required=param.default is inspect._empty, + help="\n".join(parameters_help.get(param.name, [])), + ) + + if is_sub: + defaults = {"runner": get_runner(func), **defaults} + + p.set_defaults(**defaults) + + return p diff --git a/src/sisl_toolbox/cli/_cli.py b/src/sisl_toolbox/cli/_cli.py index a6984cf39a..e84c9da8da 100644 --- a/src/sisl_toolbox/cli/_cli.py +++ b/src/sisl_toolbox/cli/_cli.py @@ -44,8 +44,7 @@ def register(self, setup): """ self._cmds.append(setup) - def __call__(self, argv=None): - + def get_parser(self): # Create command-line cmd = Path(sys.argv[0]) p = argparse.ArgumentParser( @@ -70,6 +69,12 @@ def __call__(self, argv=None): for cmd in self._cmds: cmd(subp, parser_kwargs=dict(formatter_class=p.formatter_class)) + return p + + def __call__(self, argv=None): + + p = self.get_parser() + args = p.parse_args(argv) args.runner(args) diff --git a/src/sisl_toolbox/cli/_cli_imports.py b/src/sisl_toolbox/cli/_cli_imports.py index cb6df8f9b6..3276034145 100644 --- a/src/sisl_toolbox/cli/_cli_imports.py +++ b/src/sisl_toolbox/cli/_cli_imports.py @@ -8,5 +8,6 @@ __all__ = [] -import sisl_toolbox.siesta.atom # noqa: F401 +import sisl_toolbox.siesta.atom._register_cli # noqa: F401 +import sisl_toolbox.siesta.minimizer._register_cli # noqa: F401 import sisl_toolbox.transiesta.poisson # noqa: F401 diff --git a/src/sisl_toolbox/siesta/atom/_atom.py b/src/sisl_toolbox/siesta/atom/_atom.py index 807a32b4be..828f3912a6 100644 --- a/src/sisl_toolbox/siesta/atom/_atom.py +++ b/src/sisl_toolbox/siesta/atom/_atom.py @@ -42,9 +42,6 @@ __all__ = ["AtomInput", "atom_plot_cli"] - -_script = Path(sys.argv[0]).name - # We don't use the Siesta units here... _Ang2Bohr = si.units.convert("Ang", "Bohr") @@ -803,57 +800,6 @@ def next_rc(ir, ic, nrows, ncols): return fig, axs -def atom_plot_cli(subp=None, parser_kwargs={}): - """Run plotting command for the output of atom""" - - is_sub = not subp is None - - title = "Plotting facility for atom output (run in the atom output directory)" - if is_sub: - global _script - _script = f"{_script} atom-plot" - p = subp.add_parser("atom-plot", description=title, help=title, **parser_kwargs) - else: - import argparse - - p = argparse.ArgumentParser(title, **parser_kwargs) - - p.add_argument( - "--plot", - "-P", - action="append", - type=str, - choices=("wavefunction", "charge", "log", "potential"), - help="""Determine what to plot""", - ) - - p.add_argument("-l", default="spdf", type=str, help="""Which l shells to plot""") - - p.add_argument("--save", "-S", default=None, help="""Save output plots to file.""") - - p.add_argument( - "--show", - default=False, - action="store_true", - help="""Force showing the plot (only if --save is specified)""", - ) - - p.add_argument( - "input", type=str, default="INP", help="""Input file name (default INP)""" - ) - - if is_sub: - p.set_defaults(runner=atom_plot) - else: - atom_plot(p.parse_args()) - - -# Import object holding all the CLI -from sisl_toolbox.cli import register_toolbox_cli - -register_toolbox_cli(atom_plot_cli) - - @set_module("sisl_toolbox.siesta.atom") def atom_plot(args): import matplotlib.pyplot as plt diff --git a/src/sisl_toolbox/siesta/atom/_register_cli.py b/src/sisl_toolbox/siesta/atom/_register_cli.py new file mode 100644 index 0000000000..786dfbf402 --- /dev/null +++ b/src/sisl_toolbox/siesta/atom/_register_cli.py @@ -0,0 +1,61 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# Import object holding all the CLI +import sys +from pathlib import Path + +from sisl_toolbox.cli import register_toolbox_cli + +from ._atom import atom_plot + +# TODO once 3.10 hits, use `sys.orig_argv[0]` which is non-writeable +_script = Path(sys.argv[0]).name + + +def atom_plot_cli(subp=None, parser_kwargs={}): + """Run plotting command for the output of atom""" + + is_sub = not subp is None + + title = "Plotting facility for atom output (run in the atom output directory)" + if is_sub: + global _script + _script = f"{_script} atom-plot" + p = subp.add_parser("atom-plot", description=title, help=title, **parser_kwargs) + else: + import argparse + + p = argparse.ArgumentParser(title, **parser_kwargs) + + p.add_argument( + "--plot", + "-P", + action="append", + type=str, + choices=("wavefunction", "charge", "log", "potential"), + help="""Determine what to plot""", + ) + + p.add_argument("-l", default="spdf", type=str, help="""Which l shells to plot""") + + p.add_argument("--save", "-S", default=None, help="""Save output plots to file.""") + + p.add_argument( + "--show", + default=False, + action="store_true", + help="""Force showing the plot (only if --save is specified)""", + ) + + p.add_argument( + "input", type=str, default="INP", help="""Input file name (default INP)""" + ) + + if is_sub: + p.set_defaults(runner=atom_plot) + else: + atom_plot(p.parse_args()) + + +register_toolbox_cli(atom_plot_cli) diff --git a/src/sisl_toolbox/siesta/minimizer/__init__.py b/src/sisl_toolbox/siesta/minimizer/__init__.py index ee7b1c1e81..adf4b96751 100644 --- a/src/sisl_toolbox/siesta/minimizer/__init__.py +++ b/src/sisl_toolbox/siesta/minimizer/__init__.py @@ -103,3 +103,4 @@ from ._minimize_siesta import * from ._runner import * from ._variable import * +from .basis_optimization import * diff --git a/src/sisl_toolbox/siesta/minimizer/_atom_basis.py b/src/sisl_toolbox/siesta/minimizer/_atom_basis.py index 9125cc8a45..ec33065f29 100644 --- a/src/sisl_toolbox/siesta/minimizer/_atom_basis.py +++ b/src/sisl_toolbox/siesta/minimizer/_atom_basis.py @@ -8,6 +8,7 @@ import sisl as si from sisl._internal import set_module +from sisl.messages import info from sisl.utils import NotNonePropertyDict from ._variable import Variable @@ -323,6 +324,7 @@ def basis(self): # get options for this shell opts = self.opts[(n, l)] + line = f"n={n} {l} {len(orbs)}" for key, value in opts.items(): if key == "pol": @@ -342,6 +344,8 @@ def basis(self): # explicit radius, convert to Bohr ri *= _Ang2Bohr line += f" {ri:.10f}" + else: + info("Omitting soft-confinement due to V0 < 0.001 eV") elif key == "split": # split-norm value @@ -363,6 +367,8 @@ def basis(self): line += f" {yukawa/_Ang2Bohr:.10f}" if not width is None: line += f" {width*_Ang2Bohr:.10f}" + else: + info("Omitting charge-confinement due to Z < 0.005 e") else: raise ValueError(f"Unknown option for n={n} l={l}: {key}") block.append(line) diff --git a/src/sisl_toolbox/siesta/minimizer/_metric_siesta.py b/src/sisl_toolbox/siesta/minimizer/_metric_siesta.py index eb4c67c9de..b34c3207af 100644 --- a/src/sisl_toolbox/siesta/minimizer/_metric_siesta.py +++ b/src/sisl_toolbox/siesta/minimizer/_metric_siesta.py @@ -31,12 +31,13 @@ def _siesta_out_accept(out): if not isinstance(out, io_siesta.stdoutSileSiesta): out = io_siesta.stdoutSileSiesta(out) - accept = out.completed() + accept = out.info.completed if accept: with out: # We do not accept: # KBproj: WARNING: KB projector does not decay to zero accept = not out.step_to("KB projector does not decay to zero")[0] + if accept: for l in (0, 1, 2): with out: diff --git a/src/sisl_toolbox/siesta/minimizer/_minimize.py b/src/sisl_toolbox/siesta/minimizer/_minimize.py index 5ddcf89e79..aacf913d08 100644 --- a/src/sisl_toolbox/siesta/minimizer/_minimize.py +++ b/src/sisl_toolbox/siesta/minimizer/_minimize.py @@ -9,6 +9,7 @@ from hashlib import sha256 from numbers import Real from pathlib import Path +from typing import Callable import numpy as np from scipy.optimize import dual_annealing, minimize @@ -22,11 +23,16 @@ "BaseMinimize", "LocalMinimize", "DualAnnealingMinimize", + "BADSMinimize", "MinimizeToDispatcher", + "ParticleSwarmsMinimize", ] _log = logging.getLogger(__name__) +# TODO we should have some kind of checks for the bounds, whether they +# make sense or not. + def _convert_optimize_result(minimizer, result): """Convert optimize result to conform to the scaling procedure performed""" @@ -54,12 +60,15 @@ class BaseMinimize: # Basic minimizer basically used for figuring out whether # to use a local or global minimization strategy - def __init__(self, variables=(), out="minimize.dat", norm="identity"): + def __init__( + self, variables=(), out="minimize.dat", norm="identity", out_fmt="20.17e" + ): # ensure we have an ordered dict, for one reason or the other self.variables = [] if variables is not None: for v in variables: self.add_variable(v) + self.out_fmt = out_fmt self.reset(out, norm) def reset(self, out=None, norm=None): @@ -221,7 +230,9 @@ def __enter__(self): comment += f" The first {len(self.data)} lines contains prior content." data = np.column_stack((self.data.x, self.data.y)) self._fh = tableSile(self.out, "w").__enter__() - self._fh.write_data(data.T, comment=comment, header=header, fmt="20.17e") + self._fh.write_data( + data.T, comment=comment, header=header, fmt=self.out_fmt + ) self._fh.flush() return self @@ -288,15 +299,18 @@ def _minimize_func(self, norm_variables, *args): pass # Else we have to call minimize - metric = np.array(self(variables, *args)) - # add the data to the output file and hash it - self._fh.write_data( - variables.reshape(-1, 1), metric.reshape(-1, 1), fmt="20.17e" - ) - self._fh.flush() - self.data.x.append(variables) - self.data.y.append(metric) - self.data.hash.append(current_hash) + metric = self(variables, *args) + + if metric is not None: + metric = np.array(metric) + # add the data to the output file and hash it + self._fh.write_data( + variables.reshape(-1, 1), metric.reshape(-1, 1), fmt=self.out_fmt + ) + self._fh.flush() + self.data.x.append(variables) + self.data.y.append(metric) + self.data.hash.append(current_hash) return metric @@ -332,6 +346,75 @@ def run(self, *args, **kwargs): return _convert_optimize_result(self, opt) +@set_module("sisl_toolbox.siesta.minimizer") +class BADSMinimize(BaseMinimize): + """Bayesian Adaptive Direct Search (BADS) minimizer. + + It uses the pybads package to perform the minimization. + """ + + def run(self, options={}, func_hard_bounds: Callable = lambda x: x, **kwargs): + from pybads.bads import BADS + + bounds = self.normalize_bounds() + bounds = np.array(bounds) + + hard_bounds = func_hard_bounds(bounds) + + with self: + + bads = BADS( + fun=self._minimize_func, + lower_bounds=hard_bounds[:, 0], + upper_bounds=hard_bounds[:, 1], + plausible_lower_bounds=bounds[:, 0], + plausible_upper_bounds=bounds[:, 1], + options=options, + ) + + # They do a deep copy of the arguments at the end, when generating the result. + # This might be a problem if the arguments are not pickleable. E.g. the minimizer function + # is associated with the minimizer object, which might contain siles. + try: + opt = bads.optimize() + except TypeError: + pass + + # return _convert_optimize_result(self, opt) + + +@set_module("sisl_toolbox.siesta.minimizer") +class ParticleSwarmsMinimize(BaseMinimize): + """Particle swarms optimizer. + + It uses the pyswarms package to perform the minimization.""" + + def run(self, options: dict = {}, n_particles: int = 4, n_calls=100, **kwargs): + from pyswarms.single.global_best import GlobalBestPSO + + bounds = self.normalize_bounds() + bounds = np.array(bounds).T + + options = {"c1": 0.5, "c2": 0.3, "w": 0.9, **options} + + # We will receive as many inputs as there are particles in the algorithm. + # Run them sequentially for now, although this could be parallelized. + def func(points): + return [self._minimize_func(point) for point in points] + + with self: + optimizer = GlobalBestPSO( + n_particles=n_particles, + dimensions=len(self.variables), + options=options, + bounds=bounds, + ) + + optimizer.optimize(func, iters=n_calls) + + # return _convert_optimize_result(self, opt) + + @set_module("sisl_toolbox.siesta.minimizer") class MinimizeToDispatcher(AbstractDispatch): """Base dispatcher from class passing from Minimize class""" diff --git a/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py b/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py index b0de834e07..fc830a9b73 100644 --- a/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py +++ b/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py @@ -15,7 +15,13 @@ from ._minimize import * from ._runner import AndRunner -__all__ = ["MinimizeSiesta", "LocalMinimizeSiesta", "DualAnnealingMinimizeSiesta"] +__all__ = [ + "MinimizeSiesta", + "LocalMinimizeSiesta", + "DualAnnealingMinimizeSiesta", + "BADSMinimizeSiesta", + "ParticleSwarmsMinimizeSiesta", +] _log = logging.getLogger(__name__) @@ -222,3 +228,13 @@ class LocalMinimizeSiesta(LocalMinimize, MinimizeSiesta): @set_module("sisl_toolbox.siesta.minimizer") class DualAnnealingMinimizeSiesta(DualAnnealingMinimize, MinimizeSiesta): pass + + +@set_module("sisl_toolbox.siesta.minimizer") +class BADSMinimizeSiesta(BADSMinimize, MinimizeSiesta): + pass + + +@set_module("sisl_toolbox.siesta.minimizer") +class ParticleSwarmsMinimizeSiesta(ParticleSwarmsMinimize, MinimizeSiesta): + pass diff --git a/src/sisl_toolbox/siesta/minimizer/_register_cli.py b/src/sisl_toolbox/siesta/minimizer/_register_cli.py new file mode 100644 index 0000000000..c749cfb1ab --- /dev/null +++ b/src/sisl_toolbox/siesta/minimizer/_register_cli.py @@ -0,0 +1,40 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +import argparse + +from sisl._lib._argparse import get_argparse_parser + +# Import object holding all the CLI +from sisl_toolbox.cli import register_toolbox_cli + +from .basis_optimization import optimize_basis, write_basis_to_yaml + + +def basis_cli(subp=None, parser_kwargs={}): + """Argparse CLI for the basis optimization utilities.""" + + # Create main parser + title = "Basis optimization utilities" + is_sub = not subp is None + if is_sub: + p = subp.add_parser("basis", description=title, help=title, **parser_kwargs) + else: + p = argparse.ArgumentParser(title, description=title, **parser_kwargs) + + # If the main parser is executed, just print the help + p.set_defaults(runner=lambda args: p.print_help()) + + # Add the subparsers for the commands + subp = p.add_subparsers(title="Commands") + + get_argparse_parser( + write_basis_to_yaml, name="build", subp=subp, parser_kwargs=parser_kwargs + ) + parser_kwargs["aliases"] = ("optim",) + get_argparse_parser( + optimize_basis, name="optimize", subp=subp, parser_kwargs=parser_kwargs + ) + + +register_toolbox_cli(basis_cli) diff --git a/src/sisl_toolbox/siesta/minimizer/_runner.py b/src/sisl_toolbox/siesta/minimizer/_runner.py index e07b3ed1ef..ffd3ba777f 100644 --- a/src/sisl_toolbox/siesta/minimizer/_runner.py +++ b/src/sisl_toolbox/siesta/minimizer/_runner.py @@ -207,7 +207,7 @@ def __init__( abs_cmd = path_abs(cmd, self.path) if abs_cmd.is_file(): self.cmd = [abs_cmd] - if not os.access(self.cmd, os.X_OK): + if not os.access(abs_cmd, os.X_OK): raise ValueError( f"{self.__class__.__name__} shell script {self.cmd.relative_to(self.path.cwd())} not executable" ) diff --git a/src/sisl_toolbox/siesta/minimizer/_variable.py b/src/sisl_toolbox/siesta/minimizer/_variable.py index 771a822a23..0a78c2369d 100644 --- a/src/sisl_toolbox/siesta/minimizer/_variable.py +++ b/src/sisl_toolbox/siesta/minimizer/_variable.py @@ -34,7 +34,7 @@ def __init__(self, name, value, **attrs): self.attrs = attrs def __getattr__(self, key): - if key in self.attrs: + if key != "attrs" and key in self.attrs: return self.attrs[key] raise AttributeError(f"could not find attribute {key}") diff --git a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py new file mode 100644 index 0000000000..eeb1b34eb2 --- /dev/null +++ b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py @@ -0,0 +1,1148 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +import logging +import os +import subprocess +from pathlib import Path +from typing import Dict, List, Literal, Optional, Tuple, Union + +import numpy as np +import yaml as yaml_module + +import sisl +from sisl._core.periodictable import PeriodicTable +from sisl._help import has_module +from sisl.messages import warn +from sisl_toolbox.siesta.minimizer import ( + AtomBasis, + BADSMinimizeSiesta, + EnergyMetric, + FunctionRunner, + LocalMinimizeSiesta, + ParticleSwarmsMinimizeSiesta, + SiestaRunner, +) +from sisl_toolbox.siesta.minimizer._minimize import _log + +__all__ = [ + "RhoReuseSiestaRunner", + "write_basis_to_yaml", + "optimize_basis", +] + + +class RhoReuseSiestaRunner(SiestaRunner): + + def run(self): + pipe = "" + stdout, stderr = self._get_standard() + for pre, f in [(">", stdout), ("2>", stderr)]: + try: + pipe += f"{pre} {f.name}" + except Exception: + pass + cmd = [str(cmd) for cmd in self.cmd + ["<", self.fdf]] + _log.debug(f"running Siesta using command[{self.path}]: {' '.join(cmd)} {pipe}") + # Remove stuff to ensure that we don't read information from prior calculations + self.clean( + "*.ion*", "fdf.*.log", "INPUT_TMP.[0-9][0-9][0-9][0-9][0-9]" + ) # f"{self.systemlabel}.*[!fdf]" + + if (self.path / "Rho.grid.nc").exists(): + if (self.path / "Rho.IN.grid.nc").exists(): + (self.path / "Rho.IN.grid.nc").unlink() + + Path(self.path / "Rho.IN.grid.nc").symlink_to(self.path / "Rho.grid.nc") + + using_rho = False + with open(self.fdf, "r") as f: + for line in f.readlines(): + if "SCF.Read.Charge.NetCDF t" in line: + using_rho = True + break + + if not using_rho: + with open(self.fdf, "a") as f: + f.write("SCF.Read.Charge.NetCDF t\n") + + return self.hook( + subprocess.run( + cmd[:-2], + cwd=self.path, + encoding="utf-8", + stdin=open(self.fdf, "r"), + stdout=stdout, + stderr=stderr, + check=False, + ) + ) + + +# Helper functions +def get_row(Z: int) -> Tuple[int, str]: + """Get the row of the periodic table for a given atomic number. + + If the atomic number is not in the periodic table, a ValueError is raised. + + Parameters + ---------- + Z : int + atomic number + + Returns + ------- + int + row of the periodic table where the atomic number is located. First + row is index 0. + str + block of the element (s, p, d, f, g). + """ + Z = abs(Z) + + if 0 < Z <= 2: + return 0, "s" + elif Z <= 18: + rel_Z = Z - 2 + prev_rows = 1 + + if rel_Z < 2: + block = "s" + else: + block = "p" + + return ((rel_Z - 1) // 8) + prev_rows, block + elif Z <= 54: + rel_Z = Z - 18 + prev_rows = 3 + + if rel_Z < 2: + block = "s" + elif rel_Z < 12: + block = "d" + else: + block = "p" + + return ((rel_Z - 1) // 18) + prev_rows, block + elif Z <= 118: + rel_Z = Z - 54 + prev_rows = 5 + + if rel_Z < 2: + block = "s" + elif rel_Z < 17: + block = "f" + elif rel_Z < 26: + block = "d" + else: + block = "p" + + return ((rel_Z - 1) // 32) + prev_rows, block + else: + raise ValueError(f"No such atomic number in the periodic table: {Z}") + + +def get_valence_orbs_psml(psml: str): + """Get valence orbitals from psml file. + + Parameters + ---------- + psml : str + Path to the psml file. + """ + import xml.etree.ElementTree as ET + + # Get the root of the xml tree + tree = ET.parse(psml) + root = tree.getroot() + + # Iterate recursively over childs until we find the valence-configuration tag, + # which contains the shells. + shells = [] + for child in root.iter(): + if child.tag.endswith("valence-configuration"): + + for shell in child: + shells.append(shell.get("n") + shell.get("l")) + break + + return shells + + +def get_valence_orbs_psf(psf: Union[str, Path]): + """Get valence orbitals from psf file header. + + Parameters + ---------- + psf : Union[str, Path] + Path to the psf file. + """ + with open(psf, "r") as f: + + for _ in range(3): + line = f.readline() + + shells = [ + shell_spec.split(" ")[0] + for shell_spec in line.strip().split("/") + if len(shell_spec) > 0 + ] + + return shells + + +BoolOrTupleBool = Union[bool, Tuple[bool]] + + +def generate_basis_config( + atoms: List[sisl.Atom], + basis_size: str = "atoms", + optimize_charge_conf: BoolOrTupleBool = False, + optimize_soft_conf: BoolOrTupleBool = False, + initial_atoms_basis: bool = False, + optimize_atoms_basis: bool = False, + cutoff_upper_bound: float = 5.5, + variable_delta: float = 0.05, +): + r"""Generates a basis configuration file to be fed to the optimizer. + + Non-polarization orbitals are set to optimize their cutoff radii, + while polarization orbitals are set to be optimized through the charge + confinement parameters. + + The function looks at possible pseudopotential files (first psml, then psf) to + determine which orbitals to include in the basis. Otherwise, it just generates + the valence orbitals for the usual conventions, without semicores. + + Parameters + ---------- + atoms : sisl.Atoms + Atoms object to generate the basis configuration for. + basis_size : str + Specification of basis size. "XZ(P)" where X is the number of zetas + and P is whether to polarize or not. + + X can also be S, D, T, Q for 1, 2, 3, 4 zetas respectively. E.g. "DZ". + + It can also be "atoms", which exactly takes the orbitals present in the atoms object. + optimize_charge_conf : bool, tuple of bool, optional + Whether some orbitals will be optimized for charge-confinement. + If true, it will optimize, the charge (Q), the Yukawa screening parameter + :math:`\lambda`, and the singularity regularization parameter :math:`\delta`. + + For multiple arguments, each can be fine-tuned on/off. + + This will only optimize charge-confinement for *empty* orbitals (:math:`q=0`). + optimize_soft_conf : bool, tuple of bool, optional + Whether all orbitals will be optimized for soft-confinement. + If true, it will optimize, the potential (:math:`V_0`), and the inner + radius :math:`r_i`. + + For multiple arguments, each can be fine-tuned on/off. + initial_atoms_basis: bool, optional + If True, the orbitals that are in the Atoms object are taken as the initial values for basis + optimization. + optimize_atoms_basis: bool, optional + If ``initial_atom_basis`` is True, whether to optimize the orbitals read from the basis or not. + cutoff_upper_bound : float, optional + Upper bound for the cutoff radii. + variable_delta : float, optional + Delta that specifies the step size in changes to the optimized variables. + It is used differently depending on the optimizer. + """ + # Parse the basis size + if basis_size != "atoms": + try: + n_zetas = int(basis_size[0]) + except: + n_zetas = { + "S": 1, + "D": 2, + "T": 3, + "Q": 4, + }[basis_size[0]] + + polarization = basis_size[-1] == "P" + + else: + n_zetas = None + polarization = False + + def _tuplilize(arg, n: int): + """Convert a string (comma-separated) to a list of entries""" + if isinstance(arg, str): + arg = [bool(o) for o in arg.split(",")] + + if isinstance(arg, bool): + arg = [arg] + + if len(arg) == 1: + arg = arg * n + + assert len(arg) == n + return arg + + optimize_charge_conf = _tuplilize(optimize_charge_conf, 3) + optimize_soft_conf = _tuplilize(optimize_soft_conf, 2) + + # Helper function to determine lower bounds for cutoff radii. + def get_cutoff_lowerbound(orb_name, i_zeta): + """Returns the lower bound for the cutoff radius for a given orbital and zeta. + + The lower bound depends on which orbital it is and for which zeta the cutoff is. + """ + if i_zeta == 1: + return 1 + elif i_zeta > 1: + return 0 + elif orb_name == "1s": + return 2 + else: + return 2.5 + + def _orbs_from_row_and_block(table_row, atom_block): + """To be used if the orbitals to optimize can't be inferred from the pseudopotential.""" + warn( + "The orbitals are pre-filled, but likely they are not correct. " + "Please check the basis before doing excessive optimizations!" + ) + if table_row == 0: + orbs = ["1s"] + elif table_row <= 2: + orbs = [f"{valence_n}s", f"{valence_n}p"] + elif table_row <= 4: + orbs = [f"{valence_n - 1}d", f"{valence_n}s"] + if atom_block != "d": + orbs.append(f"{valence_n}p") + else: + orbs = [f"{valence_n - 2}f", f"{valence_n - 1}d", f"{valence_n}s"] + if atom_block not in ("d", "f"): + orbs.append(f"{valence_n}p") + elif atom_block == "f": + warn( + "The orbitals of f-block atoms might not be correctly determined automatically. Please check that they are correct." + ) + + return orbs + + def _get_charge_conf(): + r"""Return a dict with charge-confinement optimizations. + + This is based on: + + .. math: + + V_Q(r) = \frac{Ze^{-\lambda r}}{\sqrt{r^2+\delta^2}} + + """ + charge_conf = { + "charge": { + "initial": 3.0, + "bounds": [1.0, 10.0], + "delta": variable_delta, + }, + "yukawa": { + "initial": 0.0, + "bounds": [0.0, 3.0], + "delta": 0.5, + }, + "width": { + "initial": 0.0053, + "bounds": [0.0, 0.5], + "delta": 0.05, + }, + } + for key, optimize in zip(("charge", "yukawa", "width"), optimize_charge_conf): + if not optimize: + del charge_conf[key] + return charge_conf + + def _get_soft_conf(): + r"""Return a dict with soft-confinement optimizations. + + This is based on: + + .. math: + + V(r) = V_0\frac{e^{-\frac{r_c - r_i}{r-r_i}}}{r_c-r} + + """ + soft_conf = { + "ri": { + # Negative numbers means it will be based on *rc* + "initial": -0.9, + "bounds": [-1.0, -0.5], + "delta": 0.08, + }, + "V0": { + "initial": 10.0, + "bounds": [0.0, 1000.0], + "delta": 50.0, + }, + } + for key, optimize in zip(("V0", "ri"), optimize_soft_conf): + if not optimize: + del soft_conf[key] + return soft_conf + + PT = PeriodicTable() + + config_dict = {} + + # Loop through atoms and generate a basis config for each, adding it to config_dict. + for atom in atoms: + table_row = PT.Z_row(abs(atom.Z)) + atom_block = PT.Z_block(abs(atom.Z)) + # TODO check these are equivalent + table_row, atom_block = get_row(atom.Z) + + valence_n = table_row + 1 + pol_orbs = [] + + # Find out which orbitals to include for this atom. + tag = atom.tag + if basis_size == "atoms": + orbs = ["dummy"] + elif (psml_path := Path(f"{tag}.psml")).exists(): + # From the psml pseudopotential + orbs = get_valence_orbs_psml(psml_path) + elif ( + psml_path := Path(os.environ.get("SIESTA_PS_PATH", "")) / f"{tag}.psml" + ).exists(): + # From the psml pseudopotential + orbs = get_valence_orbs_psml(psml_path) + else: + # If we didn't find any psml file, we just generate the valence orbitals using + # the row and block of the atom. + orbs = _orbs_from_row_and_block(table_row, atom_block) + + # And then look for psf pseudos to see if we need to add semicores. + # We can't directly infer valence orbitals from psf because the headers + # also contain the polarization orbitals. + psf_files = list(Path().glob(f"{tag}.psf")) + list( + Path().glob(f"{tag}.*.psf") + ) + + if len(psf_files) > 0: + psf_orbs = get_valence_orbs_psf(psf_files[0]) + + # The psf file contains also polarization orbitals, + # so we need to remove them. What we do actually is to check if there are + # lower shells than what we created and prepend them to the list of orbitals. + i = 0 + for i, psf_orb in enumerate(psf_orbs): + if psf_orb in orbs: + break + orbs = [*psf_orbs[:i], *orbs] + + orb_to_polarize = orbs[-1] + # Add polarization orbitals + if polarization: + n = int(orb_to_polarize[0]) + l = "spdfg".find(orb_to_polarize[1]) + + pol_l = l + 1 + pol_n = max(n, pol_l + 1) + + pol_orbs = [f"{pol_n}{'spdfg'[pol_l]}"] + + # We use the tag of the atom as the key for its basis entry. + element_config = config_dict[tag] = { + "element": tag, + } + + # Read in the initial basis if requested. + + # The provided basis contains the ranges of the orbitals from the `Atoms` + # object passed. + provided_basis = {} + if initial_atoms_basis or basis_size == "atoms": + + for orbital in atom.orbitals: + orb_name = f"{orbital.n}{orbital.l}" + zeta = orbital.zeta + + provided_basis[orb_name, zeta] = orbital.R + + # Add this orbital to the basis if we are requested to set up the + # basis from the atoms object. + if basis_size == "atoms": + if orb_name not in element_config: + element_config[orb_name] = {"basis": {}} + + basis = element_config[orb_name]["basis"] + + if optimize_atoms_basis: + basis[f"zeta{zeta}"].update( + { + "initial": orbital.R if orbital.R >= 0 else 3.0, + "bounds": [ + get_cutoff_lowerbound(orb_name, zeta), + cutoff_upper_bound, + ], + "delta": variable_delta, + } + ) + else: + basis[f"zeta{zeta}"] = orbital.R + + if abs(orbital.q0) < 1e-8 and any(optimize_charge_conf): + # charge-confinement is typically only suitable for + # 0-charge orbitals. + basis["charge-confinement"] = _get_charge_conf() + + if any(optimize_soft_conf): + # soft-confinement optimization + basis["soft-confinement"] = _get_soft_conf() + + if basis_size == "atoms": + # We have already set up the basis from the atoms. + continue + + # If we are here, it means that the size of the basis is not determined by the provided + # atoms object, that is, basis size is something like DZP. + + def get_cutoff_upper_bound(orb_name, zeta): + cutoff = provided_basis.get((orb_name, zeta), None) + if cutoff is None: + return cutoff_upper_bound + return cutoff * 0.95 + + # Loop through non-polarization orbitals and add them to the basis. + for orb_name in orbs: + orb_basis = {} + + if polarization: + if orb_name == orb_to_polarize: + orb_basis["polarization"] = 1 + + if any(optimize_soft_conf): + # soft-confinement optimization + orb_basis["soft-confinement"] = _get_soft_conf() + + for i_zeta in range(1, n_zetas + 1): + orb_id = (orb_name, i_zeta) + + orb_basis[f"zeta{i_zeta}"] = { + "initial": provided_basis.get(orb_id, 3.0), + } + + if orb_id not in provided_basis: + # Orbital not provided, we need to optimize it from scratch. + orb_basis[f"zeta{i_zeta}"].update( + { + "bounds": [ + get_cutoff_lowerbound(orb_name, i_zeta), + get_cutoff_upper_bound(orb_name, i_zeta), + ], + "delta": variable_delta, + } + ) + else: + # Orbital provided, we don't need to optimize it. + orb_basis[f"zeta{i_zeta}"] = provided_basis[orb_id] + + # In this case the orbitals are based on the valence configuration. + # I.e. all orbitals have charge. In this case it does not + # make sense to add charge-confinement optimizations. + # For this, only the polarization orbital would gain this. + + element_config[orb_name] = {"basis": orb_basis} + + # Loop through polarization orbitals and add them to the basis. + if polarization: + for orb_name in pol_orbs: + + pol_orb_basis = {} + + # Check if the polarization orbital is already in the provided basis. + if (orb_name, 1) in provided_basis: + # Orbital provided in the basis + zeta = 1 + while (orb_name, zeta) in provided_basis: + pol_orb_basis[f"zeta{zeta}"] = provided_basis[(orb_name, zeta)] + + zeta += 1 + + elif any(optimize_charge_conf): + # Orbital not provided in the basis, and we need to optimize it. + pol_orb_basis["polarizes"] = orb_to_polarize + + else: + continue + + if any(optimize_charge_conf): + pol_orb_basis["charge-confinement"] = _get_charge_conf() + if any(optimize_soft_conf): + orb_basis["soft-confinement"] = _get_soft_conf() + element_config[orb_name] = {"basis": pol_orb_basis} + + return config_dict + + +def set_minimizer_variables( + minimizer, + basis: Dict[str, AtomBasis], + basis_config: dict, + zeta: Union[int, None] = None, + polarization: bool = False, + optimized_variables: Union[dict, None] = None, +): + """Sets the variables that the minimizer must optimize. + + A basis will contain all variables, but they must be optimized in a certain order. + For each step, this function must be called to adapt the basis and set the variables + on the minimizer. + + Parameters + ---------- + minimizer : Minimizer + Minimizer where to add the variables. + basis : Dict[str, AtomBasis] + Basis dictionary, which will be modified in-place to create the basis + for the minimization step. + basis_config : dict + basis configuration dictionary, which will also be modified in-place. + zeta : Union[int, None], optional + Zeta shell for which we want to optimize cutoff radius. Set it to None + if we are optimizing polarization orbitals. + polarization : bool, optional + Whether we are optimizing polarization orbitals or not. + optimized_variables : Union[dict, None], optional + Dictionary containing the already optimized variables. This is required + unless we are in the first step (optimizing the first zeta shell). + """ + for atom_key, atom_config in basis_config.items(): + + # Loop through all defined orbitals and find those that are polarization orbitals. + # We are going to perform some modifications on the basis if there are polarization + # orbitals to optimize. + for orb_key, orb_config in atom_config.items(): + + if not isinstance(orb_config, dict): + continue + + polarized_orb = orb_config.get("basis", {}).get("polarizes") + + # This orbital is not an orbital that polarizes another one. + if polarized_orb is None: + continue + + if polarization: + # We are optimizing the polarization orbital, get its cutoff radii. + if optimized_variables is None: + raise ValueError( + "To optimize the polarizing orbitals we need the optimized zeta cutoffs of the orbitals they polarize." + ) + + # Remove the specification for a default polarized orbital, since we are going + # to explicitly include it. + atom_config[polarized_orb]["basis"].pop("polarization") + + # Loop through all the zetas of the orbital that this one polarizes and + # copy the cutoffs. + zeta = 0 + while True: + zeta += 1 + + # The polarizing orbital has one less Z-shell than the + # orbital it polarizes, that's why we stop if there is no next zeta. + # SZP is the exception, because we need at least one zeta for the polarizing orbital. + next_var_key = f"{atom_key}.{polarized_orb}.z{zeta + 1}" + if zeta > 1 and next_var_key not in optimized_variables: + break + + var_key = f"{atom_key}.{polarized_orb}.z{zeta}" + orb_config["basis"][f"zeta{zeta}"] = optimized_variables[var_key] + + else: + # We are not optimizing the polarization orbitals, so we need to tell SIESTA + # to use its default polarization orbitals, for the polarized orbital. + atom_config[polarized_orb]["basis"]["polarization"] = 1 + + # Modify the basis. + basis[atom_key] = AtomBasis.from_dict(atom_config) + atom_basis = basis[atom_key] + + # Now that we have the appropiate basis for our next minimization, gather the + # variable parameters and add them to the optimizer. + for v in atom_basis.get_variables(basis_config, atom_key): + + var_prefix = ".".join(v.name.split(".")[:-1]) + + if v.name.split(".")[-1][0] == "z": + # This is a cutoff radius for a given Z. + + var_z = int(v.name.split(".")[-1][1:]) + + if polarization or (zeta is not None and var_z < zeta + 1): + # This is a variable that should have been previously optimized + # or a fixed value is provided. + if optimized_variables is not None: + v.update(optimized_variables[f"{var_prefix}.z{var_z}"]) + elif zeta is not None and var_z == zeta + 1: + if ( + var_z > 1 + and optimized_variables is not None + and f"{var_prefix}.z{var_z - 1}" in optimized_variables + ): + v.bounds[1] = ( + optimized_variables[f"{var_prefix}.z{var_z - 1}"] * 0.95 + ) + + minimizer.add_variable(v) + elif zeta is not None and var_z > zeta + 1: + v.update(0.0) + else: + # This is something that is not a cutoff radius. For now, we assume it's a variable + # for a polarization orbital. + if polarization: + minimizer.add_variable(v) + + +def write_basis(basis: Dict[str, AtomBasis], fdf: Union[str, Path]): + """ + Writes a full basis specification to an fdf file. + + Parameters + ---------- + basis : Dict[str, AtomBasis] + Dictionary with each value being the basis specification for an atom type. + fdf : Union[str, Path] + Path to the fdf file where to write the basis specification. + """ + full_basis = [] + for atom_basis in basis.values(): + full_basis.extend(atom_basis.basis()) + + sisl.io.siesta.fdfSileSiesta(fdf, "w").set("PAO.Basis", full_basis) + + +def write_basis_to_yaml( + geometry: str, + size: str = "DZP", + optimize_charge_conf: BoolOrTupleBool = False, + optimize_soft_conf: BoolOrTupleBool = False, + variable_delta: float = 0.05, + initial_basis: Optional[str] = None, + optimize_initial_basis: bool = False, + out: Optional[str] = "basis_config.yaml", +): + """Generates a basis configuration and writes it to a yaml file. + + This yaml file can then be used to run an optimizer. + + The function looks at possible pseudopotential files (first psml, then psf) to + determine which orbitals to include in the basis. Otherwise, it just generates + the valence orbitals for the usual conventions, without semi-cores. + + Parameters + ---------- + geometry : str + Path to the geometry file for which we want to generate the basis configuration. + size : str, optional + Specification of basis size. "XZ(P)" where X is the number of zetas + and P is whether to polarize or not. + + X can also be S, D, T, Q for 1, 2, 3, 4 zetas respectively. E.g. "DZ". + + It can also be "fdf" or "basis" to use the basis size as read from the `initial_basis` argument. + This forces the `initial_basis` argument to be equal to the `size` argument. + optimize_charge_conf : bool, tuple of bool, optional + Whether some orbitals will be optimized for charge-confinement. + If true, it will optimize, the charge (Q), the Yukawa screening parameter + :math:`\lambda`, and the singularity regularization parameter :math:`\delta`. + + For multiple arguments, each can be fine-tuned on/off. + + This will only optimize charge-confinement for *empty* orbitals (:math:`q=0`). + optimize_soft_conf : bool, tuple of bool, optional + Whether all orbitals will be optimized for soft-confinement. + If true, it will optimize, the potential (:math:`V_0`), and the inner + radius :math:`r_i`. + + For multiple arguments, each can be fine-tuned on/off. + variable_delta : float, optional + Delta that specifies the step size in changes to the optimized variables. + It is used differently depending on the optimizer. + initial_basis: str, optional + If provided, it should be either "fdf" or "basis", to read the initial values from + the fdf file or the basis files respectively. + + It is force to the value of `size` if "fdf" or "basis" is provided there. + optimize_initial_basis: bool, optional + If an initial basis has been provided, whether to optimize the orbitals read from it. + out: str, optional + Path to the yaml file where to write the basis configuration. + """ + + basis_read = initial_basis + if size in ("basis", "fdf"): + if initial_basis is not None and initial_basis != size: + raise ValueError( + "If `size` is 'basis' or 'fdf', `initial_basis` must be None or equal to `size`." + ) + basis_read = size + size = "atoms" + + if basis_read == "fdf": + atoms = sisl.get_sile(geometry).read_basis(order=["fdf"]) + elif basis_read == "basis": + atoms = sisl.get_sile(geometry).read_basis() + else: + atoms = sisl.get_sile(geometry).read_geometry().atoms.atom + + basis_config = generate_basis_config( + atoms, + size, + optimize_charge_conf=optimize_charge_conf, + optimize_soft_conf=optimize_soft_conf, + variable_delta=variable_delta, + initial_atoms_basis=initial_basis is not None, + optimize_atoms_basis=optimize_initial_basis, + ) + + dumps = yaml_module.dump(basis_config) + + if out is not None: + with open(out, "w") as f: + yaml_module.dump({"basis_spec": basis_config}, f) + + return dumps + + +DEFAULT_OPTIMIZER: str = "local" +if has_module("pyswarms"): + DEFAULT_OPTIMIZER = "swarms" +if has_module("pybads"): + DEFAULT_OPTIMIZER = "bads" + + +def optimize_basis( + geometry: str, + size: str = "DZP", + optimize_charge_conf: BoolOrTupleBool = False, + optimize_soft_conf: BoolOrTupleBool = False, + variable_delta: float = 0.05, + basis_spec: dict = {}, + start: str = "z1", # "z{x}" or pol + stop: str = "polarization", # "z{x}" or pol + tol_fun: float = 1e-3, + tol_stall_iters: int = 4, + siesta_cmd: str = "siesta", + out: str = "basisoptim.dat", + out_fmt: str = "20.17e", + logging_level: Literal["critical", "notset", "info", "debug"] = "notset", + optimizer: Literal["bads", "swarms", "local"] = DEFAULT_OPTIMIZER, + optimizer_kwargs: dict = {}, +): + """Optimizes a basis set for a given geometry. + + The optimization minimizes basis enthalpy. The formula for + basis enthalpy is `H = E + pV`, where E is the total energy, + V is the volume of the basis and p is a "pressure" parameter). + The pressure must be set by the user in the fdf file through + the `BasisPressure` input. + + The procedure follows this sequence: + + - First it optimizes the cutoff radii for the first zeta shell. + + - Then it optimizes the cutoff radii for the second zeta shell. + + - ... + + - Then it optimizes the cutoff radii for the last zeta shell. + + - Finally, it optimizes the polarization orbitals (if requested).\n\n + + Appropriate optimization bounds are set for each parameter.\n\n + + At each step, all corresponding parameters are optimized simultaneously.\n\n + + The optimization algorithm can be selected with the `method` argument. Note that + for all methods it is recommended to run multiple instances of the optimization + and check that they lead to the same minimum (or pick the best one). Although some + algorithms might be more reliable than others to avoid local minima. + + If no basis specification is provided, the basis specification is automatically generated + using the arguments ``size`` and ``optimize_pol``. + + To determine which orbitals to include in the basis, the function looks at possible + pseudopotential files (first psml, then psf). If they are not there, it just generates + the valence orbitals for the usual conventions, without semi-cores. + + This function only provides the simplest specification for basis shapes. If you need more + customization control, first generate a basis specification and then provide it to this + function's `basis_spec` argument. + + Parameters + ---------- + geometry : + Path to the geometry file for which we want to generate the basis configuration. + This fdf file might include other parameters to use when running SIESTA. + A sensible parameter to include in this fdf file is `BasisPressure`, which + controls how much a big basis is penalized in the basis enthalpy metric + (the higher the value, the more penalized). + + This file can't be named RUN.fdf. + size : + Specification of basis size. "XZ(P)" where X is the number of zetas + and P is whether to polarize or not. + + X can also be S, D, T, Q for 1, 2, 3, 4 zetas respectively. E.g. "DZ". + + It can also be "fdf" or "basis" to use the basis size as read from the `initial_basis` argument. + This forces the `initial_basis` argument to be equal to the `size` argument. + optimize_charge_conf : bool, tuple of bool, optional + Whether some orbitals will be optimized for charge-confinement. + If true, it will optimize, the charge (Q), the Yukawa screening parameter + :math:`\lambda`, and the singularity regularization parameter :math:`\delta`. + + For multiple arguments, each can be fine-tuned on/off. + + This will only optimize charge-confinement for *empty* orbitals (:math:`q=0`). + optimize_soft_conf : bool, tuple of bool, optional + Whether all orbitals will be optimized for soft-confinement. + If true, it will optimize, the potential (:math:`V_0`), and the inner + radius :math:`r_i`. + + For multiple arguments, each can be fine-tuned on/off. + variable_delta : + Delta that specifies the step size in changes to the optimized variables. + It is used differently depending on the optimizer. + basis_spec : + An already built basis specification dictionary. If provided, it will be used + instead of created from the previous arguments. + + You can use ``write_yaml`` to generate a template, and then tailor it to your needs. + start : + At which step to start optimizing. Previous steps will be read from previous runs. + Can be "z{x}" or "polarization" where x is the zeta shell number. + stop : + At which step to stop optimizing. Can be "z{x}" or "polarization" where x is the zeta shell number. + The optimization is stopped AFTER optimizing this step. + tol_fun : + Tolerance for the optimization function value. If the function value changes less than this + value after some iterations, the optimization is stopped. + tol_stall_iters : + Number of iterations that the function value must not change more than tol_fun to consider + the optimization ended. Note that one iteration in BADS consists of multiple function evaluations. + siesta_cmd : + Shell command to run siesta. + out : + Path to the file where to write the points evaluated by the optimizer. For each step, a file + will be generated prefixed with the step name: "{step}_{out}. + out_fmt : + Format string to use when writing the points evaluated by the optimizer. + logging_level : + Logging level to use. + optimizer : + Which optimizer to use. Can be: + + - "bads": Uses pybads to run the Bayesian Adaptive Direct Search (BADS) algorithm + for optimization. When benchmarking algorithms, BADS has proven to be the best at finding the + global minimum, this is why it is the default (if available). + - "swarms": Uses pyswarms to run a Particle Swarms optimization. + Is default if importable, and `pybads` is not available. + - "local": Uses the scipy.optimize.minimize function. + Is default if neither of `pybads` or `pyswarms` are importable. + optimizer_kwargs : + Keyword arguments to pass to the optimizer. + """ + + logging.basicConfig( + format="%(levelname)s:%(message)s", + ) # level=getattr(logging, logging_level.upper()) + # Set the log level of the sisl logger + _log.setLevel(getattr(logging, logging_level.upper())) + + optimizer_kwargs = optimizer_kwargs.copy() + + # Generate the basis specification if no basis specification is provided. + if len(basis_spec) == 0: + basis_yaml = write_basis_to_yaml( + geometry=geometry, + size=size, + optimize_charge_conf=optimize_charge_conf, + optimize_soft_conf=optimize_soft_conf, + variable_delta=variable_delta, + out=None, + ) + basis_spec = yaml_module.safe_load(basis_yaml) + + # From the basis specification, generate the basis dictionary. + atom_keys = list(basis_spec.keys()) + basis = { + atom_key: AtomBasis.from_dict(basis_spec[atom_key]) for atom_key in atom_keys + } + + # Create a RUN.fdf file that includes the geometry and the basis. + fdf_path = Path(geometry) + if fdf_path.exists() and fdf_path.name != "RUN.fdf": + with open(fdf_path.parent / "RUN.fdf", "w") as f: + f.write(f"%include BASIS.fdf\n") + f.write(f"%include {fdf_path}\n") + f.write(f"SaveRho t\n") + + run_siesta = RhoReuseSiestaRunner( + ".", cmd=siesta_cmd, stdout="RUN.out", stderr="error" + ) + run_basis_writer = FunctionRunner(write_basis, basis, run_siesta.path / "BASIS.fdf") + metric = EnergyMetric( + run_siesta.absattr("stdout"), energy="basis.enthalpy", failure=0.0 + ) + + # Define sequence of running stuff + runner = run_basis_writer & run_siesta + + # Get the number of zeta shells + n_zetas = 0 + for atom_config in basis_spec.values(): + for orb_config in atom_config.values(): + if "basis" in orb_config: + n_zetas = max( + n_zetas, + len( + [ + key + for key in orb_config["basis"].keys() + if key.startswith("zeta") + ] + ), + ) + + optimized_variables = {} + + optim_iters = [] + for zeta in range(n_zetas): + # We allow the minimizer to go below the cutoff range if it deems it necessary. + # We also allow it to go above the cutoff range for the first zeta shell. + if zeta == 0: + + def get_hard_bounds(bounds): + hard_bounds = np.full_like(bounds, 0.5) + + hard_bounds[:, 1] = np.max(bounds) + 2 + + return hard_bounds + + else: + + def get_hard_bounds(bounds): + hard_bounds = np.full_like(bounds, 0.5) + + hard_bounds[:, 1] = bounds[:, 1] + + return hard_bounds + + optim_iters.append( + { + "zeta": zeta, + "polarization": False, + "out_prefix": f"z{zeta + 1}", + "skip": not start.startswith("z") or (int(start[1:]) > zeta + 1), + "stop": stop.startswith("z") and (int(stop[1:]) < zeta + 1), + "get_hard_bounds": get_hard_bounds, + "start_info": "Otimizing zeta shell {zeta + 1}", + } + ) + + optim_iters.append( + { + "zeta": None, + "polarization": True, + "out_prefix": "polarization", + "skip": False, + "stop": True, + "get_hard_bounds": lambda bounds: bounds, + "start_info": "Otimizing polarization orbitals", + } + ) + + # Pick the optimizer + optimizer = optimizer.lower() + if optimizer == "bads": + minimizer_cls = BADSMinimizeSiesta + elif optimizer == "local": + minimizer_cls = LocalMinimizeSiesta + elif optimizer == "swarms": + minimizer_cls = ParticleSwarmsMinimizeSiesta + else: + raise ValueError(f"Unknown optimizer: {optimizer}") + + for optim_iter in optim_iters: + _log.info(optim_iter["start_info"]) + + # Get the minimizer and set the variables that it must optimize + minimize = minimizer_cls( + out=f"{optim_iter['out_prefix']}_{out}", + norm="identity", + runner=runner, + metric=metric, + out_fmt=out_fmt, + ) + set_minimizer_variables( + minimize, + basis, + basis_spec, + zeta=optim_iter["zeta"], + polarization=optim_iter["polarization"], + optimized_variables=optimized_variables, + ) + + # Check if the user wants to optimize this Z shell + if not optim_iter.get("skip", False) and len(minimize.variables) > 0: + + if optimizer == "local": + # Retrieve delta-x for the jacobian for this + eps = minimize.normalize("delta", with_offset=False) + + result = minimize.run( + tol=tol_fun, + jac="2-point", + options={ + "disp": True, + "ftol": tol_fun, + "iprint": 3, + "eps": eps, + "finite_diff_rel_step": eps, + "maxiter": 1000, + **optimizer_kwargs.pop("options", {}), + }, + **optimizer_kwargs, + ) + elif optimizer == "swarms": + result = minimize.run(**optimizer_kwargs) + else: + result = minimize.run( + get_hard_bounds=optim_iter["get_hard_bounds"], + options={ + "tol_fun": tol_fun, + "tol_stall_iters": tol_stall_iters, + **optimizer_kwargs.pop("options", {}), + }, + **optimizer_kwargs, + ) + else: + # Just make it read the data + with minimize: + pass + + if len(minimize.variables) > 0: + # Get minimum from all samples + candidates = minimize.candidates() + minimize.update(candidates.x_target) + + best_values = { + var.name: var_best + for var, var_best in zip(minimize.variables, candidates.x_target) + } + + optimized_variables.update(best_values) + + # Write the minimum to a file. + write_basis(basis, f"{optim_iter['out_prefix']}_after_minimize.fdf") + + if optim_iter.get("stop", False): + break