From 1daf5e83ee08cbbfdfa39563ea6aa2a108b8f811 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Tue, 15 Apr 2025 00:54:06 +0200 Subject: [PATCH 1/9] Basis optimization CLI --- src/sisl/_lib/_argparse.py | 197 ++- src/sisl_toolbox/cli/_cli_imports.py | 1 + .../siesta/minimizer/_metric_siesta.py | 3 +- .../siesta/minimizer/_minimize.py | 104 +- .../siesta/minimizer/_minimize_siesta.py | 16 +- src/sisl_toolbox/siesta/minimizer/_runner.py | 2 +- .../siesta/minimizer/_variable.py | 2 +- .../siesta/minimizer/basis_optimization.py | 1170 +++++++++++++++++ 8 files changed, 1476 insertions(+), 19 deletions(-) create mode 100644 src/sisl_toolbox/siesta/minimizer/basis_optimization.py diff --git a/src/sisl/_lib/_argparse.py b/src/sisl/_lib/_argparse.py index 110efed924..3cc8e32482 100644 --- a/src/sisl/_lib/_argparse.py +++ b/src/sisl/_lib/_argparse.py @@ -1,11 +1,200 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. + +import argparse +import inspect +import typing +from typing import Any, Callable, Optional, Union + +from sisl._lib._docscrape import FunctionDoc + try: - from rich_argparse import RichHelpFormatter + from rich_argparse import RawTextRichHelpFormatter - SislHelpFormatter = RichHelpFormatter + SislHelpFormatter = RawTextRichHelpFormatter except ImportError: - import argparse - SislHelpFormatter = argparse.RawDescriptionHelpFormatter + + +def is_optional(field): + """Check whether the annotation for a parameter is an Optional type.""" + return typing.get_origin(field) is Union and type(None) in typing.get_args(field) + + +def get_optional_arg(field): + """Get the type of an optional argument from the typehint. + + It only works if the annotation only has one type. + + E.g.: Optional[int] -> int + E.g.: Optional[Union[int, str]] -> raises ValueError + """ + if not is_optional(field): + return field + + args = typing.get_args(field) + if len(args) > 2: + raise ValueError("Optional type must have at most 2 arguments") + for arg in args: + if arg is not type(None): + return arg + + raise ValueError("No non-None type found in Union") + + +class NotPassedArg: + """Placeholder to use for arguments that have not been passed. + + By setting this as the default value for an argument, we can + later check if the argument was passed through the CLI or not. + """ + + def __init__(self, val): + self.val = val + + def __repr__(self): + return repr(self.val) + + def __str__(self): + return str(self.val) + + def __eq__(self, other): + return self.val == other + + def __getattr__(self, name): + return getattr(self.val, name) + + +def get_runner(func): + """Wraps a function to receive the args parsed from argparse""" + + def _runner(args): + + # Get the config argument. If present, load the arguments + # from the config (yaml) file. + config = getattr(args, "config", None) + config_args = {} + if config is not None: + import yaml + + with open(args.config, "r") as f: + config_args = yaml.safe_load(f) + + # Build the final arguments dictionary, using the arguments of the + # config file as defaults. + final_args = {} + for k, v in vars(args).items(): + if k in ("runner", "config"): + continue + elif isinstance(v, NotPassedArg): + final_args[k] = config_args.get(k, v.val) + else: + final_args[k] = v + + # And call the function + return func(**final_args) + + return _runner + + +def get_argparse_parser( + func: Callable, + name: Optional[str] = None, + subp=None, + parser_kwargs: dict[str, Any] = {}, + arg_aliases: dict[str, str] = {}, + defaults: dict[str, Any] = {}, + add_config: bool = True, +) -> argparse.ArgumentParser: + """Creates an argument parser from a function's signature and docstring. + + The created argument parser just mimics the function. It is a CLI version + of the function. + + Parameters + ---------- + func : + The function to create the parser for. + name : + The name of the parser. If None, the function's name is used. + subp : + The subparser to add the parser to. If None, a new isolated + parser is created. + parser_kwargs : + Additional arguments to pass to the parser. + arg_aliases : + Dictionary holding aliases (shortcuts) for the arguments. The keys + of this dictionary are the argument names, and the values are the + aliases. For example, if the function has an argument called + `--size`, and you want to add a shortcut `-s`, you can pass + `arg_aliases={"size": "s"}`. + defaults : + Dictionary holding default values for the arguments. The keys + of this dictionary are the argument names, and the values are + the default values. The defaults are taken from the function's + signature if not specified. + add_config : + If True, adds a `--config` argument to the parser. This + argument accepts a path to a YAML file containing the + arguments for the function. + """ + + # Check if the function needs to be added as a subparser + is_sub = not subp is None + + # Get the function's information + fdoc = FunctionDoc(func) + signature = inspect.signature(func) + + # Initialize parser + title = "".join(fdoc["Summary"]) + if is_sub: + p = subp.add_parser( + name or func.__name__.replace("_", "-"), + description=title, + help=title, + **parser_kwargs, + ) + else: + p = argparse.ArgumentParser(title, **parser_kwargs) + + # Add the config argument to load the arguments from a YAML file + if add_config: + p.add_argument( + "--config", + "-c", + type=str, + default=None, + help="Path to a YAML file containing the arguments for the command", + ) + + group = p.add_argument_group("Function arguments") + + # Add all the function's parameters to the parser + parameters_help = {p.name: p.desc for p in fdoc["Parameters"]} + for param in signature.parameters.values(): + + arg_names = [f"--{param.name.replace('_', '-')}"] + if param.name in arg_aliases: + arg_names.append(f"-{arg_aliases[param.name]}") + + annotation = param.annotation + if is_optional(annotation): + annotation = get_optional_arg(annotation) + + group.add_argument( + *arg_names, + type=annotation, + default=NotPassedArg(param.default), + action=argparse.BooleanOptionalAction if annotation is bool else None, + required=param.default is inspect._empty, + help="\n".join(parameters_help.get(param.name, [])), + ) + + if is_sub: + defaults = {"runner": get_runner(func), **defaults} + + p.set_defaults(**defaults) + + return p diff --git a/src/sisl_toolbox/cli/_cli_imports.py b/src/sisl_toolbox/cli/_cli_imports.py index cb6df8f9b6..9999935b9c 100644 --- a/src/sisl_toolbox/cli/_cli_imports.py +++ b/src/sisl_toolbox/cli/_cli_imports.py @@ -9,4 +9,5 @@ import sisl_toolbox.siesta.atom # noqa: F401 +import sisl_toolbox.siesta.minimizer.basis_optimization # noqa: F401 import sisl_toolbox.transiesta.poisson # noqa: F401 diff --git a/src/sisl_toolbox/siesta/minimizer/_metric_siesta.py b/src/sisl_toolbox/siesta/minimizer/_metric_siesta.py index eb4c67c9de..b34c3207af 100644 --- a/src/sisl_toolbox/siesta/minimizer/_metric_siesta.py +++ b/src/sisl_toolbox/siesta/minimizer/_metric_siesta.py @@ -31,12 +31,13 @@ def _siesta_out_accept(out): if not isinstance(out, io_siesta.stdoutSileSiesta): out = io_siesta.stdoutSileSiesta(out) - accept = out.completed() + accept = out.info.completed if accept: with out: # We do not accept: # KBproj: WARNING: KB projector does not decay to zero accept = not out.step_to("KB projector does not decay to zero")[0] + if accept: for l in (0, 1, 2): with out: diff --git a/src/sisl_toolbox/siesta/minimizer/_minimize.py b/src/sisl_toolbox/siesta/minimizer/_minimize.py index 5ddcf89e79..f3f2b2b61b 100644 --- a/src/sisl_toolbox/siesta/minimizer/_minimize.py +++ b/src/sisl_toolbox/siesta/minimizer/_minimize.py @@ -9,6 +9,7 @@ from hashlib import sha256 from numbers import Real from pathlib import Path +from typing import Callable import numpy as np from scipy.optimize import dual_annealing, minimize @@ -22,7 +23,9 @@ "BaseMinimize", "LocalMinimize", "DualAnnealingMinimize", + "BADSMinimize", "MinimizeToDispatcher", + "ParticleSwarmsMinimize", ] _log = logging.getLogger(__name__) @@ -54,12 +57,15 @@ class BaseMinimize: # Basic minimizer basically used for figuring out whether # to use a local or global minimization strategy - def __init__(self, variables=(), out="minimize.dat", norm="identity"): + def __init__( + self, variables=(), out="minimize.dat", norm="identity", out_fmt="20.17e" + ): # ensure we have an ordered dict, for one reason or the other self.variables = [] if variables is not None: for v in variables: self.add_variable(v) + self.out_fmt = out_fmt self.reset(out, norm) def reset(self, out=None, norm=None): @@ -221,7 +227,9 @@ def __enter__(self): comment += f" The first {len(self.data)} lines contains prior content." data = np.column_stack((self.data.x, self.data.y)) self._fh = tableSile(self.out, "w").__enter__() - self._fh.write_data(data.T, comment=comment, header=header, fmt="20.17e") + self._fh.write_data( + data.T, comment=comment, header=header, fmt=self.out_fmt + ) self._fh.flush() return self @@ -288,15 +296,18 @@ def _minimize_func(self, norm_variables, *args): pass # Else we have to call minimize - metric = np.array(self(variables, *args)) - # add the data to the output file and hash it - self._fh.write_data( - variables.reshape(-1, 1), metric.reshape(-1, 1), fmt="20.17e" - ) - self._fh.flush() - self.data.x.append(variables) - self.data.y.append(metric) - self.data.hash.append(current_hash) + metric = self(variables, *args) + + if metric is not None: + metric = np.array(metric) + # add the data to the output file and hash it + self._fh.write_data( + variables.reshape(-1, 1), metric.reshape(-1, 1), fmt=self.out_fmt + ) + self._fh.flush() + self.data.x.append(variables) + self.data.y.append(metric) + self.data.hash.append(current_hash) return metric @@ -332,6 +343,77 @@ def run(self, *args, **kwargs): return _convert_optimize_result(self, opt) +@set_module("sisl_toolbox.siesta.minimizer") +class BADSMinimize(BaseMinimize): + """Bayesian Adaptive Direct Search (BADS) minimizer. + + It uses the pybads package to perform the minimization. + """ + + def run(self, options={}, get_hard_bounds: Callable = lambda x: x, **kwargs): + from pybads.bads import BADS + + bounds = self.normalize_bounds() + bounds = np.array(bounds) + + bounds[bounds == 0] = 0.5 + + hard_bounds = get_hard_bounds(bounds) + + with self: + + bads = BADS( + fun=self._minimize_func, + lower_bounds=hard_bounds[:, 0], + upper_bounds=hard_bounds[:, 1], + plausible_lower_bounds=bounds[:, 0], + plausible_upper_bounds=bounds[:, 1], + options=options, + ) + + # They do a deep copy of the arguments at the end, when generating the result. + # This might be a problem if the arguments are not pickleable. E.g. the minimizer function + # is associated with the minimizer object, which might contain siles. + try: + opt = bads.optimize() + except TypeError: + pass + + # return _convert_optimize_result(self, opt) + + +@set_module("sisl_toolbox.siesta.minimizer") +class ParticleSwarmsMinimize(BaseMinimize): + """Particle swarms optimizer. + + It uses the pyswarms package to perform the minimization.""" + + def run(self, options: dict = {}, n_particles: int = 4, n_calls=100, **kwargs): + from pyswarms.single.global_best import GlobalBestPSO + + bounds = self.normalize_bounds() + bounds = np.array(bounds).T + + options = {"c1": 0.5, "c2": 0.3, "w": 0.9, **options} + + # We will receive as many inputs as there are particles in the algorithm. + # Run them sequentially for now, although this could be parallelized. + def func(points): + return [self._minimize_func(point) for point in points] + + with self: + optimizer = GlobalBestPSO( + n_particles=n_particles, + dimensions=len(self.variables), + options=options, + bounds=bounds, + ) + + optimizer.optimize(func, iters=n_calls) + + # return _convert_optimize_result(self, opt) + + @set_module("sisl_toolbox.siesta.minimizer") class MinimizeToDispatcher(AbstractDispatch): """Base dispatcher from class passing from Minimize class""" diff --git a/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py b/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py index b0de834e07..83a5868922 100644 --- a/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py +++ b/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py @@ -15,7 +15,13 @@ from ._minimize import * from ._runner import AndRunner -__all__ = ["MinimizeSiesta", "LocalMinimizeSiesta", "DualAnnealingMinimizeSiesta"] +__all__ = [ + "MinimizeSiesta", + "LocalMinimizeSiesta", + "DualAnnealingMinimizeSiesta", + "BADSMinimizeSiesta", + "ParticleSwarmsMinimizeSiesta", +] _log = logging.getLogger(__name__) @@ -222,3 +228,11 @@ class LocalMinimizeSiesta(LocalMinimize, MinimizeSiesta): @set_module("sisl_toolbox.siesta.minimizer") class DualAnnealingMinimizeSiesta(DualAnnealingMinimize, MinimizeSiesta): pass + + +class BADSMinimizeSiesta(BADSMinimize, MinimizeSiesta): + pass + + +class ParticleSwarmsMinimizeSiesta(ParticleSwarmsMinimize, MinimizeSiesta): + pass diff --git a/src/sisl_toolbox/siesta/minimizer/_runner.py b/src/sisl_toolbox/siesta/minimizer/_runner.py index e07b3ed1ef..ffd3ba777f 100644 --- a/src/sisl_toolbox/siesta/minimizer/_runner.py +++ b/src/sisl_toolbox/siesta/minimizer/_runner.py @@ -207,7 +207,7 @@ def __init__( abs_cmd = path_abs(cmd, self.path) if abs_cmd.is_file(): self.cmd = [abs_cmd] - if not os.access(self.cmd, os.X_OK): + if not os.access(abs_cmd, os.X_OK): raise ValueError( f"{self.__class__.__name__} shell script {self.cmd.relative_to(self.path.cwd())} not executable" ) diff --git a/src/sisl_toolbox/siesta/minimizer/_variable.py b/src/sisl_toolbox/siesta/minimizer/_variable.py index 771a822a23..0a78c2369d 100644 --- a/src/sisl_toolbox/siesta/minimizer/_variable.py +++ b/src/sisl_toolbox/siesta/minimizer/_variable.py @@ -34,7 +34,7 @@ def __init__(self, name, value, **attrs): self.attrs = attrs def __getattr__(self, key): - if key in self.attrs: + if key != "attrs" and key in self.attrs: return self.attrs[key] raise AttributeError(f"could not find attribute {key}") diff --git a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py new file mode 100644 index 0000000000..7c9e18f452 --- /dev/null +++ b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py @@ -0,0 +1,1170 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +import argparse +import logging +import subprocess +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import yaml as yaml_module + +import sisl +from sisl._lib._argparse import get_argparse_parser +from sisl.messages import warn +from sisl_toolbox.siesta.minimizer import ( + AtomBasis, + BADSMinimizeSiesta, + EnergyMetric, + FunctionRunner, + LocalMinimizeSiesta, + ParticleSwarmsMinimizeSiesta, + SiestaRunner, +) +from sisl_toolbox.siesta.minimizer._minimize import _log +from sisl_toolbox.siesta.minimizer._yaml_reader import read_yaml + + +class RhoReuseSiestaRunner(SiestaRunner): + + def run(self): + pipe = "" + stdout, stderr = self._get_standard() + for pre, f in [(">", stdout), ("2>", stderr)]: + try: + pipe += f"{pre} {f.name}" + except Exception: + pass + cmd = [str(cmd) for cmd in self.cmd + ["<", self.fdf]] + _log.debug(f"running Siesta using command[{self.path}]: {' '.join(cmd)} {pipe}") + # Remove stuff to ensure that we don't read information from prior calculations + self.clean( + "*.ion*", "fdf.*.log", "INPUT_TMP.[0-9][0-9][0-9][0-9][0-9]" + ) # f"{self.systemlabel}.*[!fdf]" + + if (self.path / "Rho.grid.nc").exists(): + if (self.path / "Rho.IN.grid.nc").exists(): + (self.path / "Rho.IN.grid.nc").unlink() + + Path(self.path / "Rho.IN.grid.nc").symlink_to(self.path / "Rho.grid.nc") + + using_rho = False + with open(self.fdf, "r") as f: + for line in f.readlines(): + if "SCF.Read.Charge.NetCDF t" in line: + using_rho = True + break + + if not using_rho: + with open(self.fdf, "a") as f: + f.write("SCF.Read.Charge.NetCDF t\n") + + return self.hook( + subprocess.run( + cmd[:-2], + cwd=self.path, + encoding="utf-8", + stdin=open(self.fdf, "r"), + stdout=stdout, + stderr=stderr, + check=False, + ) + ) + + +# Helper functions +def get_row(Z: int) -> Tuple[int, str]: + """Get the row of the periodic table for a given atomic number. + + If the atomic number is not in the periodic table, a ValueError is raised. + + Parameters + ---------- + Z : int + atomic number + + Returns + ------- + int + row of the periodic table where the atomic number is located. First + row is index 0. + str + block of the element (s, p, d, f, g). + """ + Z = abs(Z) + + if 0 < Z <= 2: + return 0, "s" + elif Z <= 18: + rel_Z = Z - 2 + prev_rows = 1 + + if rel_Z < 2: + block = "s" + else: + block = "p" + + return ((rel_Z - 1) // 8) + prev_rows, block + elif Z <= 54: + rel_Z = Z - 18 + prev_rows = 3 + + if rel_Z < 2: + block = "s" + elif rel_Z < 12: + block = "d" + else: + block = "p" + + return ((rel_Z - 1) // 18) + prev_rows, block + elif Z <= 118: + rel_Z = Z - 54 + prev_rows = 5 + + if rel_Z < 2: + block = "s" + elif rel_Z < 17: + block = "f" + elif rel_Z < 26: + block = "d" + else: + block = "p" + + return ((rel_Z - 1) // 32) + prev_rows, block + else: + raise ValueError(f"No such atomic number in the periodic table: {Z}") + + +def get_valence_orbs_psml(psml: str): + """Get valence orbitals from psml file. + + Parameters + ---------- + psml : str + Path to the psml file. + """ + import xml.etree.ElementTree as ET + + # Get the root of the xml tree + tree = ET.parse(psml) + root = tree.getroot() + + # Iterate recursively over childs until we find the valence-configuration tag, + # which contains the shells. + shells = [] + for child in root.iter(): + if child.tag.endswith("valence-configuration"): + + for shell in child: + shells.append(shell.get("n") + shell.get("l")) + break + + return shells + + +def get_valence_orbs_psf(psf: Union[str, Path]): + """Get valence orbitals from psf file header. + + Parameters + ---------- + psf : Union[str, Path] + Path to the psf file. + """ + with open(psf, "r") as f: + + for _ in range(3): + line = f.readline() + + shells = [ + shell_spec.split(" ")[0] + for shell_spec in line.strip().split("/") + if len(shell_spec) > 0 + ] + + return shells + + +def generate_basis_config( + atoms: List[sisl.Atom], + basis_size: str = "atoms", + optimize_pol: bool = False, + pol_optimize_screening: bool = False, + pol_optimize_width: bool = False, + initial_atoms_basis: bool = False, + optimize_atoms_basis: bool = False, + cutoff_upper_bound: float = 5.5, + variable_delta: float = 0.05, +): + """Generates a basis configuration file to be fed to the optimizer. + + Non-polarization orbitals are set to optimize their cutoff radii, + while polarization orbitals are set to be optimized through the charge + confinement parameters. + + The function looks at possible pseudopotential files (first psml, then psf) to + determine which orbitals to include in the basis. Otherwise, it just generates + the valence orbitals for the usual conventions, without semicores. + + Parameters + ---------- + atoms : sisl.Atoms + Atoms object to generate the basis configuration for. + basis_size : str + Specification of basis size. "XZ(P)" where X is the number of zetas + and P is whether to polarize or not. + + X can also be S, D, T, Q for 1, 2, 3, 4 zetas respectively. E.g. "DZ". + + It can also be "atoms", which exactly takes the orbitals present in the atoms object. + optimize_pol : bool, optional + If the basis contains polarization orbitals, whether they should be + explicitly optimized or not. If not, the default polarization orbitals + will be requested. + + The polarization orbitals are optimized through the charge confinement. + pol_optimize_screening : bool, optional + If polarization orbitals are optimized, whether the screening length + of the charge confinement should be optimized or not. + pol_optimize_width : bool, optional + If polarization orbitals are optimized, whether the width of the charge confinement + should be optimized or not. + initial_atoms_basis: bool, optional + If True, the orbitals that are in the Atoms object are taken as the initial values for basis + optimization. + optimize_atoms_basis: bool, optional + If ``initial_atom_basis`` is True, whether to optimize the orbitals read from the basis or not. + cutoff_upper_bound : float, optional + Upper bound for the cutoff radii. + variable_delta : float, optional + Delta that specifies the step size in changes to the optimized variables. + It is used differently depending on the optimizer. + """ + # Parse the basis size + if basis_size != "atoms": + try: + n_zetas = int(basis_size[0]) + except: + n_zetas = { + "S": 1, + "D": 2, + "T": 3, + "Q": 4, + }[basis_size[0]] + + polarization = basis_size[-1] == "P" + + optimize_pol = optimize_pol and polarization + else: + n_zetas = None + polarization = False + optimize_pol = False + + # Helper function to determine lower bounds for cutoff radii. + def get_cutoff_lowerbound(orb_name, i_zeta): + """Returns the lower bound for the cutoff radius for a given orbital and zeta. + + The lower bound depends on which orbital it is and for which zeta the cutoff is. + """ + if i_zeta > 0: + if i_zeta == 1: + return 1 + else: + return 0 + elif orb_name == "1s": + return 2 + else: + return 2.5 + + def _orbs_from_row_and_block(table_row, atom_block): + """To be used if the orbitals to optimize can't be inferred from the pseudopotential.""" + if table_row == 0: + orbs = ["1s"] + elif table_row <= 2: + orbs = [f"{valence_n}s", f"{valence_n}p"] + elif table_row <= 4: + orbs = [f"{valence_n - 1}d", f"{valence_n}s"] + if atom_block != "d": + orbs.append(f"{valence_n}p") + else: + orbs = [f"{valence_n - 2}f", f"{valence_n - 1}d", f"{valence_n}s"] + if atom_block not in ("d", "f"): + orbs.append(f"{valence_n}p") + elif atom_block == "f": + warn( + "The orbitals of f-block atoms might not be correctly determined automatically. Please check that they are correct." + ) + + return orbs + + config_dict = {} + # Loop through atoms and generate a basis config for each, adding it to config_dict. + for atom in atoms: + table_row, atom_block = get_row(atom.Z) + + valence_n = table_row + 1 + pol_orbs = [] + + # Find out which orbitals to include for this atom. + tag = atom.tag + if basis_size == "atoms": + orbs = ["dummy"] + elif Path(f"{tag}.psml").exists(): + # From the psml pseudopotential + orbs = get_valence_orbs_psml(f"{tag}.psml") + else: + # If we didn't find any psml file, we just generate the valence orbitals using + # the row and block of the atom. + orbs = _orbs_from_row_and_block(table_row, atom_block) + + # And then look for psf pseudos to see if we need to add semicores. + # We can't directly infer valence orbitals from psf because the headers + # also contain the polarization orbitals. + psf_files = list(Path().glob(f"{tag}.psf")) + list( + Path().glob(f"{tag}.*.psf") + ) + + if len(psf_files) > 0: + psf_orbs = get_valence_orbs_psf(psf_files[0]) + + # The psf file contains also polarization orbitals, + # so we need to remove them. What we do actually is to check if there are + # lower shells than what we created and prepend them to the list of orbitals. + i = 0 + for i, psf_orb in enumerate(psf_orbs): + if psf_orb in orbs: + break + orbs = [*psf_orbs[:i], *orbs] + + orb_to_polarize = orbs[-1] + # Add polarization orbitals + if polarization: + n = int(orb_to_polarize[0]) + l = "spdfg".find(orb_to_polarize[1]) + + pol_l = l + 1 + pol_n = max(n, pol_l + 1) + + pol_orbs = [f"{pol_n}{'spdfg'[pol_l]}"] + + # We use the tag of the atom as the key for its basis entry. + element_config = config_dict[tag] = { + "element": tag, + } + + # Read in the initial basis if requested. + provided_basis = {} + if initial_atoms_basis or basis_size == "atoms": + + for orbital in atom.orbitals: + name = orbital.name() + orb_name, zeta = name[:2], int(name.split("Z")[1][0]) + + provided_basis[orb_name, zeta] = orbital.R + + # Add this orbital to the basis if we are requested to set up the + # basis from the atoms object. + if basis_size == "atoms": + if orb_name not in element_config: + element_config[orb_name] = {"basis": {}} + + basis = element_config[orb_name]["basis"] + + if optimize_atoms_basis: + basis[f"zeta{zeta}"].update( + { + "initial": orbital.R if orbital.R >= 0 else 3.0, + "bounds": [ + get_cutoff_lowerbound(orb_name, zeta), + cutoff_upper_bound, + ], + "delta": variable_delta, + } + ) + else: + basis[f"zeta{zeta}"] = orbital.R + + if basis_size == "atoms": + # We have already set up the basis from the atoms. + continue + + # If we are here, it means that the size of the basis is not determined by the provided + # atoms object, that is, basis size is something like DZP. + + def get_cutoff_upper_bound(orb_name, i_zeta): + if (orb_name, i_zeta) in provided_basis: + return provided_basis[(orb_name, i_zeta)] * 0.95 + else: + return cutoff_upper_bound + + # Loop through non-polarization orbitals and add them to the basis. + for orb_name in orbs: + orb_basis = {} + + if polarization and not optimize_pol: + if orb_name == orb_to_polarize: + orb_basis["pol"] = 1 + + for i_zeta in range(n_zetas): + orb_id = (orb_name, i_zeta + 1) + + orb_basis[f"zeta{i_zeta + 1}"] = { + "initial": provided_basis.get(orb_id, 3.0), + } + + if orb_id not in provided_basis: + # Orbital not provided, we need to optimize it from scratch. + orb_basis[f"zeta{i_zeta + 1}"].update( + { + "bounds": [ + get_cutoff_lowerbound(orb_name, i_zeta), + get_cutoff_upper_bound(orb_name, i_zeta), + ], + "delta": variable_delta, + } + ) + else: + # Orbital provided, we don't need to optimize it. + orb_basis[f"zeta{i_zeta + 1}"] = provided_basis[orb_id] + + element_config[orb_name] = {"basis": orb_basis} + + # Loop through polarization orbitals and add them to the basis. + if polarization: + for orb_name in pol_orbs: + + # Check if the polarization orbital is already in the provided basis. + if (orb_name, 1) in provided_basis: + # Orbital provided in the basis + pol_orb_basis = {} + + zeta = 1 + while (orb_name, zeta) in provided_basis: + pol_orb_basis[f"zeta{zeta}"] = provided_basis[(orb_name, zeta)] + + zeta += 1 + + elif optimize_pol: + # Orbital not provided in the basis, and we need to optimize it. + pol_orb_basis = { + "polarizes": orb_to_polarize, + "charge-confinement": { + "charge": { + "initial": 3.0, + "bounds": [1.0, 10.0], + "delta": variable_delta, + }, + }, + } + + charge_confinement = pol_orb_basis["charge-confinement"] + + if pol_optimize_screening: + # Screening length (Ang-1) + charge_confinement["yukawa"] = { + "initial": 0.0, + "bounds": [0.0, 3.0], + "delta": 0.5, + } + if pol_optimize_width: + # Width for the potential (Ang) + charge_confinement["width"] = { + "initial": 0.0053, + "bounds": [0.0, 0.5], + "delta": 0.05, + } + + else: + continue + + element_config[orb_name] = {"basis": pol_orb_basis} + + return config_dict + + +def set_minimizer_variables( + minimizer, + basis: Dict[str, AtomBasis], + basis_config: dict, + zeta: Union[int, None] = None, + polarization: bool = False, + optimized_variables: Union[dict, None] = None, +): + """Sets the variables that the minimizer must optimize. + + A basis will contain all variables, but they must be optimized in a certain order. + For each step, this function must be called to adapt the basis and set the variables + on the minimizer. + + Parameters + ---------- + minimizer : Minimizer + Minimizer where to add the variables. + basis : Dict[str, AtomBasis] + Basis dictionary, which will be modified in-place to create the basis + for the minimization step. + basis_config : dict + basis configuration dictionary, which will also be modified in-place. + zeta : Union[int, None], optional + Zeta shell for which we want to optimize cutoff radius. Set it to None + if we are optimizing polarization orbitals. + polarization : bool, optional + Whether we are optimizing polarization orbitals or not. + optimized_variables : Union[dict, None], optional + Dictionary containing the already optimized variables. This is required + unless we are in the first step (optimizing the first zeta shell). + """ + for atom_key, atom_config in basis_config.items(): + + # Loop through all defined orbitals and find those that are polarization orbitals. + # We are going to perform some modifications on the basis if there are polarization + # orbitals to optimize. + for orb_key, orb_config in atom_config.items(): + + if not isinstance(orb_config, dict): + continue + + polarized_orb = orb_config.get("basis", {}).get("polarizes") + + # This orbital is not an orbital that polarizes another one. + if polarized_orb is None: + continue + + if polarization: + # We are optimizing the polarization orbital, get its cutoff radii. + if optimized_variables is None: + raise ValueError( + "To optimize the polarizing orbitals we need the optimized zeta cutoffs of the orbitals they polarize." + ) + + # Remove the specification for a default polarized orbital, since we are going + # to explicitly include it. + atom_config[polarized_orb]["basis"].pop("pol") + + # Loop through all the zetas of the orbital that this one polarizes and + # copy the cutoffs. + zeta = 0 + while True: + zeta += 1 + + # The polarizing orbital has one less Z-shell than the + # orbital it polarizes, that's why we stop if there is no next zeta. + # SZP is the exception, because we need at least one zeta for the polarizing orbital. + next_var_key = f"{atom_key}.{polarized_orb}.z{zeta + 1}" + if zeta > 1 and next_var_key not in optimized_variables: + break + + var_key = f"{atom_key}.{polarized_orb}.z{zeta}" + orb_config["basis"][f"zeta{zeta}"] = optimized_variables[var_key] + + else: + # We are not optimizing the polarization orbitals, so we need to tell SIESTA + # to use its default polarization orbitals, for the polarized orbital. + atom_config[polarized_orb]["basis"]["pol"] = 1 + + # Modify the basis. + basis[atom_key] = AtomBasis.from_dict(atom_config) + + atom_basis = basis[atom_key] + + # Now that we have the appropiate basis for our next minimization, gather the + # variable parameters and add them to the optimizer. + for v in atom_basis.get_variables(basis_config, atom_key): + + var_prefix = ".".join(v.name.split(".")[:-1]) + + if v.name.split(".")[-1][0] == "z": + # This is a cutoff radius for a given Z. + + var_z = int(v.name.split(".")[-1][1:]) + + if polarization or (zeta is not None and var_z < zeta + 1): + # This is a variable that should have been previously optimized + # or a fixed value is provided. + if optimized_variables is not None: + v.update(optimized_variables[f"{var_prefix}.z{var_z}"]) + elif zeta is not None and var_z == zeta + 1: + if ( + var_z > 1 + and optimized_variables is not None + and f"{var_prefix}.z{var_z - 1}" in optimized_variables + ): + v.bounds[1] = ( + optimized_variables[f"{var_prefix}.z{var_z - 1}"] * 0.95 + ) + + minimizer.add_variable(v) + elif zeta is not None and var_z > zeta + 1: + v.update(0.0) + else: + # This is something that is not a cutoff radius. For now, we assume it's a variable + # for a polarization orbital. + if polarization: + minimizer.add_variable(v) + + +def write_basis(basis: Dict[str, AtomBasis], fdf: Union[str, Path]): + """ + Writes a full basis specification to an fdf file. + + Parameters + ---------- + basis : Dict[str, AtomBasis] + Dictionary with each value being the basis specification for an atom type. + fdf : Union[str, Path] + Path to the fdf file where to write the basis specification. + """ + full_basis = [] + for atom_key, atom_basis in basis.items(): + full_basis.extend(atom_basis.basis()) + + sisl.io.siesta.fdfSileSiesta(fdf, "w").set("PAO.Basis", full_basis) + + +def write_basis_to_yaml( + geometry: str, + size: str = "DZP", + optimize_pol: bool = False, + pol_optimize_screening: bool = False, + pol_optimize_width: bool = False, + variable_delta: float = 0.05, + initial_basis: Optional[str] = None, + optimize_initial_basis: bool = False, + out: Optional[str] = "basis_config.yaml", +): + """Generates a basis configuration and writes it to a yaml file. + + This yaml file can then be used to run an optimizer. + + The function looks at possible pseudopotential files (first psml, then psf) to + determine which orbitals to include in the basis. Otherwise, it just generates + the valence orbitals for the usual conventions, without semicores. + + Parameters + ---------- + geometry : str + Path to the geometry file for which we want to generate the basis configuration. + size : str, optional + Specification of basis size. "XZ(P)" where X is the number of zetas + and P is whether to polarize or not. + + X can also be S, D, T, Q for 1, 2, 3, 4 zetas respectively. E.g. "DZ". + + It can also be "fdf" or "basis" to use the basis size as read from the `initial_basis` argument. + This forces the `initial_basis` argument to be equal to the `size` argument. + optimize_pol : bool, optional + If the basis contains polarization orbitals, whether they should be + explicitly optimized or not. If not, the default polarization orbitals + will be requested. + pol_optimize_screening : bool, optional + If polarization orbitals are optimized, whether the screening length + of the charge confinement should be optimized or not. + pol_optimize_width : bool, optional + If polarization orbitals are optimized, whether the width of the charge confinement + should be optimized or not. + variable_delta : float, optional + Delta that specifies the step size in changes to the optimized variables. + It is used differently depending on the optimizer. + initial_basis: str, optional + If provided, it should be either "fdf" or "basis", to read the initial values from + the fdf file or the basis files respectively. + + It is force to the value of `size` if "fdf" or "basis" is provided there. + optimize_initial_basis: bool, optional + If an initial basis has been provided, whether to optimize the orbitals read from it. + out: str, optional + Path to the yaml file where to write the basis configuration. + """ + + basis_read = initial_basis + if size in ("basis", "fdf"): + if initial_basis is not None and initial_basis != size: + raise ValueError( + "If `size` is 'basis' or 'fdf', `initial_basis` must be None or equal to `size`." + ) + basis_read = size + size = "atoms" + + if basis_read == "fdf": + atoms = sisl.get_sile(geometry).read_basis(order=["fdf"]) + elif basis_read == "basis": + atoms = sisl.get_sile(geometry).read_basis() + else: + atoms = sisl.get_sile(geometry).read_geometry().atoms.atom + + basis_config = generate_basis_config( + atoms, + size, + optimize_pol=optimize_pol, + pol_optimize_screening=pol_optimize_screening, + pol_optimize_width=pol_optimize_width, + variable_delta=variable_delta, + initial_atoms_basis=initial_basis is not None, + optimize_atoms_basis=optimize_initial_basis, + ) + + dumps = yaml_module.dump(basis_config) + + if out is not None: + with open(out, "w") as f: + yaml_module.dump({"basis_spec": basis_config}, f) + + return dumps + + +def optimize_basis( + geometry: str, + size: str = "DZP", + optimize_pol: bool = False, + variable_delta: float = 0.05, + basis_spec: dict = {}, + start: str = "z1", # "z{x}" or pol + stop: str = "pol", # "z{x}" or pol + tol_fun: float = 1e-3, + tol_stall_iters: int = 4, + siesta_cmd: str = "siesta", + out: str = "basisoptim.dat", + out_fmt: str = "20.17e", + logging_level: str = "notset", # Literal["critical", "notset", "info", "debug"] + optimizer: str = "bads", + optimizer_kwargs: dict = {}, +): + """Optimizes a basis set for a given geometry. + + The optimization follows the below sequence: + + - First it optimizes the cutoff radii for the first zeta shell. + + - Then it optimizes the cutoff radii for the second zeta shell. + + - ... + + - Then it optimizes the cutoff radii for the last zeta shell. + + - Finally, it optimizes the polarization orbitals (if requested).\n\n + + Appropiate optimization bounds are set for each parameter.\n\n + + At each step, all corresponding parameters are optimized simultaneously.\n\n + + The optimization algorithm can be selected with the `method` argument. Note that + for all methods it is recommended to run multiple instances of the optimization + and check that they lead to the same minimum (or pick the best one). Although some + algorithms might be more reliable than others to avoid local minima. + + If no basis specification is provided, the basis specification is automatically generated + using the arguments ``size`` and ``optimize_pol``. + + To determine which orbitals to include in the basis, the function looks at possible + pseudopotential files (first psml, then psf). If they are not there, it just generates + the valence orbitals for the usual conventions, without semicores. + + This function only provides the simplest specification for basis shapes. If you need more + customization control, first generate a basis specification and then provide it to this + function's `basis_spec` argument. + + Parameters + ---------- + geometry : str + Path to the geometry file for which we want to generate the basis configuration. + This fdf file might include other parameters to use when running SIESTA. It can't be + named RUN.fdf, though. + size : str, optional + Specification of basis size. "XZ(P)" where X is the number of zetas + and P is whether to polarize or not. + + X can also be S, D, T, Q for 1, 2, 3, 4 zetas respectively. E.g. "DZ". + + It can also be "fdf" or "basis" to use the basis size as read from the `initial_basis` argument. + This forces the `initial_basis` argument to be equal to the `size` argument. + optimize_pol : bool, optional + If the basis contains polarization orbitals, whether they should be + explicitly optimized or not. If not, the default polarization orbitals + will be requested. + variable_delta : float, optional + Delta that specifies the step size in changes to the optimized variables. + It is used differently depending on the optimizer. + basis_spec : dict, optional + An already built basis specification dictionary. If provided, it will be used + instead of created from the previous arguments. + + You can use ``write_yaml`` to generate a template, and then tailor it to your needs. + start : str, optional + At which step to start optimizing. Previous steps will be read from previous runs. + Can be "z{x}" or "pol" where x is the zeta shell number. + stop : str, optional + At which step to stop optimizing. Can be "z{x}" or "pol" where x is the zeta shell number. + The optimization is stopped AFTER optimizing this step. + tol_fun : float, optional + Tolerance for the optimization function value. If the function value changes less than this + value after some iterations, the optimization is stopped. + tol_stall_iters : int, optional + Number of iterations that the function value must not change more than tol_fun to consider + the optimization ended. Note that one iteration in BADS consists of multiple function evaluations. + siesta_cmd : str, optional + Shell command to run siesta. + out : str, optional + Path to the file where to write the points evaluated by the optimizer. For each step, a file + will be generated prefixed with the step name: "{step}_{out}. + out_fmt : str, optional + Format string to use when writing the points evaluated by the optimizer. + logging_level : str, optional + Logging level to use. Can be "critical", "notset", "info" or "debug". + optimizer : str, optional + Which optimizer to use. Can be: + - "bads": Uses pybads to run the Bayesian Adaptive Direct Search (BADS) algorithm + for optimization. When benchmarking algorithms, BADS has proven to be the best at finding the + global minimum, this is why it is the default. + - "swarms": Uses pyswarms to run a Particle Swarms optimization. + - "local": Uses the scipy.optimize.minimize function. + optimizer_kwargs : dict, optional + Keyword arguments to pass to the optimizer. + """ + + logging.basicConfig( + format="%(levelname)s:%(message)s", + ) # level=getattr(logging, logging_level.upper()) + # Set the log level of the sisl logger + _log.setLevel(getattr(logging, logging_level.upper())) + + optimizer_kwargs = optimizer_kwargs.copy() + + # Generate the basis specification if no basis specification is provided. + if len(basis_spec) == 0: + basis_yaml = write_basis_to_yaml( + geometry=geometry, + size=size, + optimize_pol=optimize_pol, + variable_delta=variable_delta, + out=None, + ) + basis_spec = yaml_module.safe_load(basis_yaml) + + # From the basis specification, generate the basis dictionary. + atom_keys = list(basis_spec.keys()) + basis = { + atom_key: AtomBasis.from_dict(basis_spec[atom_key]) for atom_key in atom_keys + } + + # Create a RUN.fdf file that includes the geometry and the basis. + fdf_path = Path(geometry) + if fdf_path.exists() and fdf_path.name != "RUN.fdf": + with open(fdf_path.parent / "RUN.fdf", "w") as f: + f.write(f"%include BASIS.fdf\n") + f.write(f"%include {fdf_path}\n") + f.write(f"SaveRho t\n") + + run_siesta = RhoReuseSiestaRunner( + ".", cmd=siesta_cmd, stdout="RUN.out", stderr="error" + ) + run_basis_writer = FunctionRunner(write_basis, basis, run_siesta.path / "BASIS.fdf") + metric = EnergyMetric( + run_siesta.absattr("stdout"), energy="basis.enthalpy", failure=0.0 + ) + + # Define sequence of running stuff + runner = run_basis_writer & run_siesta + + # Get the number of zeta shells + n_zetas = 0 + for atom_config in basis_spec.values(): + for orb_config in atom_config.values(): + if "basis" in orb_config: + n_zetas = max( + n_zetas, + len( + [ + key + for key in orb_config["basis"].keys() + if key.startswith("zeta") + ] + ), + ) + + optimized_variables = {} + + optim_iters = [] + for zeta in range(n_zetas): + # We allow the minimizer to go below the cutoff range if it deems it necessary. + # We also allow it to go above the cutoff range for the first zeta shell. + if zeta == 0: + + def get_hard_bounds(bounds): + hard_bounds = np.full_like(bounds, 0.5) + + hard_bounds[:, 1] = np.max(bounds) + 2 + + return hard_bounds + + else: + + def get_hard_bounds(bounds): + hard_bounds = np.full_like(bounds, 0.5) + + hard_bounds[:, 1] = bounds[:, 1] + + return hard_bounds + + optim_iters.append( + { + "zeta": zeta, + "polarization": False, + "out_prefix": f"z{zeta + 1}", + "skip": not start.startswith("z") or (int(start[1:]) > zeta + 1), + "stop": stop.startswith("z") and (int(stop[1:]) < zeta + 1), + "get_hard_bounds": get_hard_bounds, + "start_info": "Otimizing zeta shell {zeta + 1}", + } + ) + + optim_iters.append( + { + "zeta": None, + "polarization": True, + "out_prefix": "pol", + "skip": False, + "stop": True, + "get_hard_bounds": lambda bounds: bounds, + "start_info": "Otimizing polarization orbitals", + } + ) + + # Pick the optimizer + optimizer = optimizer.lower() + if optimizer == "bads": + minimizer_cls = BADSMinimizeSiesta + elif optimizer == "local": + minimizer_cls = LocalMinimizeSiesta + elif optimizer == "swarms": + minimizer_cls = ParticleSwarmsMinimizeSiesta + else: + raise ValueError(f"Unknown optimizer: {optimizer}") + + for optim_iter in optim_iters: + _log.info(optim_iter["start_info"]) + + # Get the minimizer and set the variables that it must optimize + minimize = minimizer_cls( + out=f"{optim_iter['out_prefix']}_{out}", + norm="identity", + runner=runner, + metric=metric, + out_fmt=out_fmt, + ) + set_minimizer_variables( + minimize, + basis, + basis_spec, + zeta=optim_iter["zeta"], + polarization=optim_iter["polarization"], + optimized_variables=optimized_variables, + ) + + # Check if the user wants to optimize this Z shell + if not optim_iter.get("skip", False) and len(minimize.variables) > 0: + + if optimizer == "local": + # Retrieve delta-x for the jacobian for this + eps = minimize.normalize("delta", with_offset=False) + + result = minimize.run( + tol=tol_fun, + jac="2-point", + options={ + "disp": True, + "ftol": tol_fun, + "iprint": 3, + "eps": eps, + "finite_diff_rel_step": eps, + "maxiter": 1000, + **optimizer_kwargs.pop("options", {}), + }, + **optimizer_kwargs, + ) + elif optimizer == "swarms": + result = minimize.run(**optimizer_kwargs) + else: + result = minimize.run( + get_hard_bounds=optim_iter["get_hard_bounds"], + options={ + "tol_fun": tol_fun, + "tol_stall_iters": tol_stall_iters, + **optimizer_kwargs.pop("options", {}), + }, + **optimizer_kwargs, + ) + else: + # Just make it read the data + with minimize: + pass + + if len(minimize.variables) > 0: + # Get minimum from all samples + candidates = minimize.candidates() + minimize.update(candidates.x_target) + + best_values = { + var.name: var_best + for var, var_best in zip(minimize.variables, candidates.x_target) + } + + optimized_variables.update(best_values) + + # Write the minimum to a file. + write_basis(basis, f"{optim_iter['out_prefix']}_after_minimize.fdf") + + if optim_iter.get("stop", False): + break + + +def basis_cli(subp=None, parser_kwargs={}): + """Argparse CLI for the basis optimization utilities.""" + + # Create main parser + title = "Basis optimization utilities" + is_sub = not subp is None + if is_sub: + p = subp.add_parser("basis", description=title, help=title, **parser_kwargs) + else: + p = argparse.ArgumentParser(title, description=title, **parser_kwargs) + + # If the main parser is executed, just print the help + p.set_defaults(runner=lambda args: p.print_help()) + + # Add the subparsers for the commands + subp = p.add_subparsers(title="Commands") + + get_argparse_parser( + optimize_basis, name="optim", subp=subp, parser_kwargs=parser_kwargs + ) + get_argparse_parser( + write_basis_to_yaml, name="build", subp=subp, parser_kwargs=parser_kwargs + ) + + +# Import object holding all the CLI +from sisl_toolbox.cli import register_toolbox_cli + +register_toolbox_cli(basis_cli) + +# def get_typer_app(): +# """Returns a typer app to use as a CLI for basis optimization.""" +# import typer + +# def yaml_dict(d: str): + +# if isinstance(d, dict): +# return d + +# return yaml_module.safe_load(d) + +# # Helper function to annotate a function with the help from the docstring. +# # In this way, typer can display the help on the command line. +# def _annotate_func(func): +# import inspect +# from copy import copy + +# from typing_extensions import Annotated + +# params_help = {} + +# in_parameters = False +# read_key = None +# arg_content = "" + +# for line in func.__doc__.split("\n"): +# if "Parameters" in line: +# in_parameters = True +# space = line.find("Parameters") +# continue + +# if in_parameters: +# if len(line) < space + 1: +# continue +# if len(line) > 1 and line[0] != " ": +# break + +# if line[space] not in (" ", "-"): +# if read_key is not None: +# params_help[read_key] = arg_content + +# read_key = line.split(":")[0].strip() +# arg_content = "" +# else: +# if arg_content == "": +# arg_content = line.strip().capitalize() +# else: +# arg_content += " " + line.strip() + +# if line.startswith("------"): +# break + +# if read_key is not None: +# params_help[read_key] = arg_content + +# @functools.wraps(func) +# def wrapper(*args, config: str = "", **kwargs): +# return func(*args, **kwargs) + +# def config_callback(ctx, param, param_value): + +# if param_value != "": +# with open(param_value, "r") as f: +# yaml_config = yaml_module.safe_load(f) + +# ctx.default_map = ctx.default_map or {} +# ctx.default_map.update(yaml_config) + +# sig = inspect.signature(func) + +# new_parameters = [] +# for param in sig.parameters.values(): + +# parser = None +# if param.annotation == dict: +# parser = yaml_dict + +# typer_arg_cls = ( +# typer.Argument +# if param.default == inspect.Parameter.empty +# else typer.Option +# ) +# new_parameters.append( +# param.replace( +# annotation=Annotated[ +# param.annotation, +# typer_arg_cls(help=params_help.get(param.name), parser=parser), +# ] +# ) +# ) + +# new_parameters.append( +# inspect.Parameter( +# "config", +# default="", +# kind=inspect.Parameter.KEYWORD_ONLY, +# annotation=Annotated[ +# str, +# typer.Option( +# "--config", +# "-c", +# help="YAML file to load the configuration from.", +# is_eager=True, +# callback=config_callback, +# ), +# ], +# ) +# ) + +# wrapper.__signature__ = sig.replace(parameters=new_parameters) +# wrapper.__doc__ = func.__doc__[: func.__doc__.find("Parameters\n")] + +# return wrapper + +# # Create the app. +# app = typer.Typer(name="Basis optimization utilities", rich_markup_mode="markdown") + +# app.command("write-yaml")(_annotate_func(write_basis_to_yaml)) +# app.command("optimize")(_annotate_func(optimize_basis)) + +# return app From 83a6aabbeb01ff65e34ec14b6ae57509cab014f3 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Tue, 15 Apr 2025 14:49:30 +0200 Subject: [PATCH 2/9] Long description for the parser --- src/sisl/_lib/_argparse.py | 24 ++- src/sisl_toolbox/cli/_cli.py | 9 +- src/sisl_toolbox/cli/_cli_imports.py | 4 +- src/sisl_toolbox/siesta/atom/_atom.py | 54 ----- src/sisl_toolbox/siesta/atom/_register_cli.py | 60 ++++++ src/sisl_toolbox/siesta/minimizer/__init__.py | 1 + .../siesta/minimizer/_register_cli.py | 39 ++++ .../siesta/minimizer/basis_optimization.py | 192 ++---------------- 8 files changed, 151 insertions(+), 232 deletions(-) create mode 100644 src/sisl_toolbox/siesta/atom/_register_cli.py create mode 100644 src/sisl_toolbox/siesta/minimizer/_register_cli.py diff --git a/src/sisl/_lib/_argparse.py b/src/sisl/_lib/_argparse.py index 3cc8e32482..0c860459b2 100644 --- a/src/sisl/_lib/_argparse.py +++ b/src/sisl/_lib/_argparse.py @@ -5,7 +5,7 @@ import argparse import inspect import typing -from typing import Any, Callable, Optional, Union +from typing import Any, Callable, Literal, Optional, Union from sisl._lib._docscrape import FunctionDoc @@ -22,6 +22,11 @@ def is_optional(field): return typing.get_origin(field) is Union and type(None) in typing.get_args(field) +def is_literal(field): + """Check whether the annotation for a parameter is a Literal type.""" + return typing.get_origin(field) is Literal + + def get_optional_arg(field): """Get the type of an optional argument from the typehint. @@ -43,6 +48,14 @@ def get_optional_arg(field): raise ValueError("No non-None type found in Union") +def get_literal_args(field): + """Get the values of a literal. + + E.g.: Literal[1, 2, 3] -> (1, 2, 3) + """ + return typing.get_args(field) + + class NotPassedArg: """Placeholder to use for arguments that have not been passed. @@ -149,10 +162,11 @@ def get_argparse_parser( # Initialize parser title = "".join(fdoc["Summary"]) + parser_help = "\n".join(fdoc["Extended Summary"]) if is_sub: p = subp.add_parser( name or func.__name__.replace("_", "-"), - description=title, + description=parser_help, help=title, **parser_kwargs, ) @@ -183,10 +197,16 @@ def get_argparse_parser( if is_optional(annotation): annotation = get_optional_arg(annotation) + choices = None + if is_literal(annotation): + choices = get_literal_args(annotation) + annotation = type(choices[0]) + group.add_argument( *arg_names, type=annotation, default=NotPassedArg(param.default), + choices=choices, action=argparse.BooleanOptionalAction if annotation is bool else None, required=param.default is inspect._empty, help="\n".join(parameters_help.get(param.name, [])), diff --git a/src/sisl_toolbox/cli/_cli.py b/src/sisl_toolbox/cli/_cli.py index a6984cf39a..e84c9da8da 100644 --- a/src/sisl_toolbox/cli/_cli.py +++ b/src/sisl_toolbox/cli/_cli.py @@ -44,8 +44,7 @@ def register(self, setup): """ self._cmds.append(setup) - def __call__(self, argv=None): - + def get_parser(self): # Create command-line cmd = Path(sys.argv[0]) p = argparse.ArgumentParser( @@ -70,6 +69,12 @@ def __call__(self, argv=None): for cmd in self._cmds: cmd(subp, parser_kwargs=dict(formatter_class=p.formatter_class)) + return p + + def __call__(self, argv=None): + + p = self.get_parser() + args = p.parse_args(argv) args.runner(args) diff --git a/src/sisl_toolbox/cli/_cli_imports.py b/src/sisl_toolbox/cli/_cli_imports.py index 9999935b9c..3276034145 100644 --- a/src/sisl_toolbox/cli/_cli_imports.py +++ b/src/sisl_toolbox/cli/_cli_imports.py @@ -8,6 +8,6 @@ __all__ = [] -import sisl_toolbox.siesta.atom # noqa: F401 -import sisl_toolbox.siesta.minimizer.basis_optimization # noqa: F401 +import sisl_toolbox.siesta.atom._register_cli # noqa: F401 +import sisl_toolbox.siesta.minimizer._register_cli # noqa: F401 import sisl_toolbox.transiesta.poisson # noqa: F401 diff --git a/src/sisl_toolbox/siesta/atom/_atom.py b/src/sisl_toolbox/siesta/atom/_atom.py index 807a32b4be..828f3912a6 100644 --- a/src/sisl_toolbox/siesta/atom/_atom.py +++ b/src/sisl_toolbox/siesta/atom/_atom.py @@ -42,9 +42,6 @@ __all__ = ["AtomInput", "atom_plot_cli"] - -_script = Path(sys.argv[0]).name - # We don't use the Siesta units here... _Ang2Bohr = si.units.convert("Ang", "Bohr") @@ -803,57 +800,6 @@ def next_rc(ir, ic, nrows, ncols): return fig, axs -def atom_plot_cli(subp=None, parser_kwargs={}): - """Run plotting command for the output of atom""" - - is_sub = not subp is None - - title = "Plotting facility for atom output (run in the atom output directory)" - if is_sub: - global _script - _script = f"{_script} atom-plot" - p = subp.add_parser("atom-plot", description=title, help=title, **parser_kwargs) - else: - import argparse - - p = argparse.ArgumentParser(title, **parser_kwargs) - - p.add_argument( - "--plot", - "-P", - action="append", - type=str, - choices=("wavefunction", "charge", "log", "potential"), - help="""Determine what to plot""", - ) - - p.add_argument("-l", default="spdf", type=str, help="""Which l shells to plot""") - - p.add_argument("--save", "-S", default=None, help="""Save output plots to file.""") - - p.add_argument( - "--show", - default=False, - action="store_true", - help="""Force showing the plot (only if --save is specified)""", - ) - - p.add_argument( - "input", type=str, default="INP", help="""Input file name (default INP)""" - ) - - if is_sub: - p.set_defaults(runner=atom_plot) - else: - atom_plot(p.parse_args()) - - -# Import object holding all the CLI -from sisl_toolbox.cli import register_toolbox_cli - -register_toolbox_cli(atom_plot_cli) - - @set_module("sisl_toolbox.siesta.atom") def atom_plot(args): import matplotlib.pyplot as plt diff --git a/src/sisl_toolbox/siesta/atom/_register_cli.py b/src/sisl_toolbox/siesta/atom/_register_cli.py new file mode 100644 index 0000000000..a5ee737328 --- /dev/null +++ b/src/sisl_toolbox/siesta/atom/_register_cli.py @@ -0,0 +1,60 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +# Import object holding all the CLI +import sys +from pathlib import Path + +from sisl_toolbox.cli import register_toolbox_cli + +from ._atom import atom_plot + +_script = Path(sys.argv[0]).name + + +def atom_plot_cli(subp=None, parser_kwargs={}): + """Run plotting command for the output of atom""" + + is_sub = not subp is None + + title = "Plotting facility for atom output (run in the atom output directory)" + if is_sub: + global _script + _script = f"{_script} atom-plot" + p = subp.add_parser("atom-plot", description=title, help=title, **parser_kwargs) + else: + import argparse + + p = argparse.ArgumentParser(title, **parser_kwargs) + + p.add_argument( + "--plot", + "-P", + action="append", + type=str, + choices=("wavefunction", "charge", "log", "potential"), + help="""Determine what to plot""", + ) + + p.add_argument("-l", default="spdf", type=str, help="""Which l shells to plot""") + + p.add_argument("--save", "-S", default=None, help="""Save output plots to file.""") + + p.add_argument( + "--show", + default=False, + action="store_true", + help="""Force showing the plot (only if --save is specified)""", + ) + + p.add_argument( + "input", type=str, default="INP", help="""Input file name (default INP)""" + ) + + if is_sub: + p.set_defaults(runner=atom_plot) + else: + atom_plot(p.parse_args()) + + +register_toolbox_cli(atom_plot_cli) diff --git a/src/sisl_toolbox/siesta/minimizer/__init__.py b/src/sisl_toolbox/siesta/minimizer/__init__.py index ee7b1c1e81..adf4b96751 100644 --- a/src/sisl_toolbox/siesta/minimizer/__init__.py +++ b/src/sisl_toolbox/siesta/minimizer/__init__.py @@ -103,3 +103,4 @@ from ._minimize_siesta import * from ._runner import * from ._variable import * +from .basis_optimization import * diff --git a/src/sisl_toolbox/siesta/minimizer/_register_cli.py b/src/sisl_toolbox/siesta/minimizer/_register_cli.py new file mode 100644 index 0000000000..3979c8c25a --- /dev/null +++ b/src/sisl_toolbox/siesta/minimizer/_register_cli.py @@ -0,0 +1,39 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. +import argparse + +from sisl._lib._argparse import get_argparse_parser + +# Import object holding all the CLI +from sisl_toolbox.cli import register_toolbox_cli + +from .basis_optimization import optimize_basis, write_basis_to_yaml + + +def basis_cli(subp=None, parser_kwargs={}): + """Argparse CLI for the basis optimization utilities.""" + + # Create main parser + title = "Basis optimization utilities" + is_sub = not subp is None + if is_sub: + p = subp.add_parser("basis", description=title, help=title, **parser_kwargs) + else: + p = argparse.ArgumentParser(title, description=title, **parser_kwargs) + + # If the main parser is executed, just print the help + p.set_defaults(runner=lambda args: p.print_help()) + + # Add the subparsers for the commands + subp = p.add_subparsers(title="Commands") + + get_argparse_parser( + optimize_basis, name="optim", subp=subp, parser_kwargs=parser_kwargs + ) + get_argparse_parser( + write_basis_to_yaml, name="build", subp=subp, parser_kwargs=parser_kwargs + ) + + +register_toolbox_cli(basis_cli) diff --git a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py index 7c9e18f452..1d65dd61b7 100644 --- a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py +++ b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py @@ -5,7 +5,7 @@ import logging import subprocess from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Literal, Optional, Tuple, Union import numpy as np import yaml as yaml_module @@ -725,8 +725,8 @@ def optimize_basis( siesta_cmd: str = "siesta", out: str = "basisoptim.dat", out_fmt: str = "20.17e", - logging_level: str = "notset", # Literal["critical", "notset", "info", "debug"] - optimizer: str = "bads", + logging_level: Literal["critical", "notset", "info", "debug"] = "notset", + optimizer: Literal["bads", "swarms", "local"] = "bads", optimizer_kwargs: dict = {}, ): """Optimizes a basis set for a given geometry. @@ -765,11 +765,11 @@ def optimize_basis( Parameters ---------- - geometry : str + geometry : Path to the geometry file for which we want to generate the basis configuration. This fdf file might include other parameters to use when running SIESTA. It can't be named RUN.fdf, though. - size : str, optional + size : Specification of basis size. "XZ(P)" where X is the number of zetas and P is whether to polarize or not. @@ -777,47 +777,48 @@ def optimize_basis( It can also be "fdf" or "basis" to use the basis size as read from the `initial_basis` argument. This forces the `initial_basis` argument to be equal to the `size` argument. - optimize_pol : bool, optional + optimize_pol : If the basis contains polarization orbitals, whether they should be explicitly optimized or not. If not, the default polarization orbitals will be requested. - variable_delta : float, optional + variable_delta : Delta that specifies the step size in changes to the optimized variables. It is used differently depending on the optimizer. - basis_spec : dict, optional + basis_spec : An already built basis specification dictionary. If provided, it will be used instead of created from the previous arguments. You can use ``write_yaml`` to generate a template, and then tailor it to your needs. - start : str, optional + start : At which step to start optimizing. Previous steps will be read from previous runs. Can be "z{x}" or "pol" where x is the zeta shell number. - stop : str, optional + stop : At which step to stop optimizing. Can be "z{x}" or "pol" where x is the zeta shell number. The optimization is stopped AFTER optimizing this step. - tol_fun : float, optional + tol_fun : Tolerance for the optimization function value. If the function value changes less than this value after some iterations, the optimization is stopped. - tol_stall_iters : int, optional + tol_stall_iters : Number of iterations that the function value must not change more than tol_fun to consider the optimization ended. Note that one iteration in BADS consists of multiple function evaluations. - siesta_cmd : str, optional + siesta_cmd : Shell command to run siesta. - out : str, optional + out : Path to the file where to write the points evaluated by the optimizer. For each step, a file will be generated prefixed with the step name: "{step}_{out}. - out_fmt : str, optional + out_fmt : Format string to use when writing the points evaluated by the optimizer. - logging_level : str, optional - Logging level to use. Can be "critical", "notset", "info" or "debug". - optimizer : str, optional + logging_level : + Logging level to use. + optimizer : Which optimizer to use. Can be: + - "bads": Uses pybads to run the Bayesian Adaptive Direct Search (BADS) algorithm for optimization. When benchmarking algorithms, BADS has proven to be the best at finding the global minimum, this is why it is the default. - "swarms": Uses pyswarms to run a Particle Swarms optimization. - "local": Uses the scipy.optimize.minimize function. - optimizer_kwargs : dict, optional + optimizer_kwargs : Keyword arguments to pass to the optimizer. """ @@ -1015,156 +1016,3 @@ def get_hard_bounds(bounds): if optim_iter.get("stop", False): break - - -def basis_cli(subp=None, parser_kwargs={}): - """Argparse CLI for the basis optimization utilities.""" - - # Create main parser - title = "Basis optimization utilities" - is_sub = not subp is None - if is_sub: - p = subp.add_parser("basis", description=title, help=title, **parser_kwargs) - else: - p = argparse.ArgumentParser(title, description=title, **parser_kwargs) - - # If the main parser is executed, just print the help - p.set_defaults(runner=lambda args: p.print_help()) - - # Add the subparsers for the commands - subp = p.add_subparsers(title="Commands") - - get_argparse_parser( - optimize_basis, name="optim", subp=subp, parser_kwargs=parser_kwargs - ) - get_argparse_parser( - write_basis_to_yaml, name="build", subp=subp, parser_kwargs=parser_kwargs - ) - - -# Import object holding all the CLI -from sisl_toolbox.cli import register_toolbox_cli - -register_toolbox_cli(basis_cli) - -# def get_typer_app(): -# """Returns a typer app to use as a CLI for basis optimization.""" -# import typer - -# def yaml_dict(d: str): - -# if isinstance(d, dict): -# return d - -# return yaml_module.safe_load(d) - -# # Helper function to annotate a function with the help from the docstring. -# # In this way, typer can display the help on the command line. -# def _annotate_func(func): -# import inspect -# from copy import copy - -# from typing_extensions import Annotated - -# params_help = {} - -# in_parameters = False -# read_key = None -# arg_content = "" - -# for line in func.__doc__.split("\n"): -# if "Parameters" in line: -# in_parameters = True -# space = line.find("Parameters") -# continue - -# if in_parameters: -# if len(line) < space + 1: -# continue -# if len(line) > 1 and line[0] != " ": -# break - -# if line[space] not in (" ", "-"): -# if read_key is not None: -# params_help[read_key] = arg_content - -# read_key = line.split(":")[0].strip() -# arg_content = "" -# else: -# if arg_content == "": -# arg_content = line.strip().capitalize() -# else: -# arg_content += " " + line.strip() - -# if line.startswith("------"): -# break - -# if read_key is not None: -# params_help[read_key] = arg_content - -# @functools.wraps(func) -# def wrapper(*args, config: str = "", **kwargs): -# return func(*args, **kwargs) - -# def config_callback(ctx, param, param_value): - -# if param_value != "": -# with open(param_value, "r") as f: -# yaml_config = yaml_module.safe_load(f) - -# ctx.default_map = ctx.default_map or {} -# ctx.default_map.update(yaml_config) - -# sig = inspect.signature(func) - -# new_parameters = [] -# for param in sig.parameters.values(): - -# parser = None -# if param.annotation == dict: -# parser = yaml_dict - -# typer_arg_cls = ( -# typer.Argument -# if param.default == inspect.Parameter.empty -# else typer.Option -# ) -# new_parameters.append( -# param.replace( -# annotation=Annotated[ -# param.annotation, -# typer_arg_cls(help=params_help.get(param.name), parser=parser), -# ] -# ) -# ) - -# new_parameters.append( -# inspect.Parameter( -# "config", -# default="", -# kind=inspect.Parameter.KEYWORD_ONLY, -# annotation=Annotated[ -# str, -# typer.Option( -# "--config", -# "-c", -# help="YAML file to load the configuration from.", -# is_eager=True, -# callback=config_callback, -# ), -# ], -# ) -# ) - -# wrapper.__signature__ = sig.replace(parameters=new_parameters) -# wrapper.__doc__ = func.__doc__[: func.__doc__.find("Parameters\n")] - -# return wrapper - -# # Create the app. -# app = typer.Typer(name="Basis optimization utilities", rich_markup_mode="markdown") - -# app.command("write-yaml")(_annotate_func(write_basis_to_yaml)) -# app.command("optimize")(_annotate_func(optimize_basis)) - -# return app From 5e99196b22d81b66c972a3ecb39f3744572899c3 Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Tue, 15 Apr 2025 15:14:18 +0200 Subject: [PATCH 3/9] Document that enthalpy is minimized --- .../siesta/minimizer/basis_optimization.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py index 1d65dd61b7..d9a218cd28 100644 --- a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py +++ b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py @@ -731,7 +731,13 @@ def optimize_basis( ): """Optimizes a basis set for a given geometry. - The optimization follows the below sequence: + The optimization minimizes basis enthalpy. The formula for + basis enthalpy is `H = E + pV`, where E is the total energy, + V is the volume of the basis and p is a "pressure" parameter). + The pressure must be set by the user in the fdf file through + the `BasisPressure` input. + + The procedure follows this sequence: - First it optimizes the cutoff radii for the first zeta shell. @@ -767,8 +773,12 @@ def optimize_basis( ---------- geometry : Path to the geometry file for which we want to generate the basis configuration. - This fdf file might include other parameters to use when running SIESTA. It can't be - named RUN.fdf, though. + This fdf file might include other parameters to use when running SIESTA. + A sensible parameter to include in this fdf file is `BasisPressure`, which + controls how much a big basis is penalized in the basis enthalpy metric + (the higher the value, the more penalized). + + This file can't be named RUN.fdf. size : Specification of basis size. "XZ(P)" where X is the number of zetas and P is whether to polarize or not. From fd4469e0c852491c4fe41048ae8b6af4bc481bec Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Tue, 15 Apr 2025 17:37:22 +0200 Subject: [PATCH 4/9] Removed unused imports --- src/sisl_toolbox/siesta/minimizer/basis_optimization.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py index d9a218cd28..d16009147b 100644 --- a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py +++ b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py @@ -1,7 +1,6 @@ # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -import argparse import logging import subprocess from pathlib import Path @@ -11,7 +10,6 @@ import yaml as yaml_module import sisl -from sisl._lib._argparse import get_argparse_parser from sisl.messages import warn from sisl_toolbox.siesta.minimizer import ( AtomBasis, @@ -23,7 +21,12 @@ SiestaRunner, ) from sisl_toolbox.siesta.minimizer._minimize import _log -from sisl_toolbox.siesta.minimizer._yaml_reader import read_yaml + +__all__ = [ + "RhoReuseSiestaRunner", + "write_basis_to_yaml", + "optimize_basis", +] class RhoReuseSiestaRunner(SiestaRunner): From be064ddcab55aad4135c122036be2a9819f8b5de Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Tue, 15 Apr 2025 18:58:49 +0200 Subject: [PATCH 5/9] Added documentation --- docs/toolbox/basis/basis.rst | 93 +++++++++++++++++++ docs/toolbox/index.rst | 4 +- docs/toolbox/siesta/minimizer.rst | 33 +++++++ .../siesta/minimizer/_minimize_siesta.py | 2 + 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 docs/toolbox/basis/basis.rst create mode 100644 docs/toolbox/siesta/minimizer.rst diff --git a/docs/toolbox/basis/basis.rst b/docs/toolbox/basis/basis.rst new file mode 100644 index 0000000000..e9e513616e --- /dev/null +++ b/docs/toolbox/basis/basis.rst @@ -0,0 +1,93 @@ +Basis optimization +================== + +.. currentmodule:: sisl_toolbox.siesta.minimizer + +Optimizing a basis set for SIESTA is a cumbersome task. +The goal of this toolbox is to allow users to **optimize a basis set with just one CLI command.** + +The commands and their options can be accessed like: + +.. code-block:: bash + + stoolbox basis --help + +In summary, whenever one wants to optimize a basis set for a given system, +the first step is to create a directory with the input files to run the +calculation. This directory should contain, as usual: + +- The ``.fdf`` files with all the input parameters for the calculation. +- The pseudopotentials (``.psf`` or ``.psml`` files). + +Then, one can directly run the optimization: + +.. code-block:: bash + + stoolbox basis optim --geometry input.fdf + +with ``input.fdf`` being the input file for the calculation. This will use all the default +values. Since there are many possible tweaks, we invite you to read carefully the help +message from the CLI. Here we will just mention some important things that could go unnoticed. + +**Basis enthalpy:** The quantity that is minimized is the basis enthalpy. This is :math:`H = E + pV` +with :math:`E` being the energy of the system, :math:`V` the volume of the basis and :math:`p` a "pressure" that +is defined in the fdf file with the `BasisPressure` table. This "pressure" penalizes bigger +basis, which result in more expensive calculations. It is the responsibility of the user to +set this value. As a rule of thumb, we recommend to set it to `0.02 GPa` for the first two +rows of the periodic table and `0.2 GPa` for the rest. + +**The SIESTA command:** There is a ``--siesta-cmd`` option to specify the way of executing SIESTA. By default, it +is simply calling the ``siesta`` command, but you could set it for example to ``mpirun -n 4 siesta`` +so that SIESTA is ran in parallel. + +There is no problem with using this CLI in clusters inside a submitted job, for example. + +**A custom basis definition:** It may happen that the conventional optimizable parameters as well as their lower and +upper bounds are not good for your case (e.g. you would like the upper bound for a cutoff +radius to be higher). In that case, you can create a custom ``--basis-spec``. The best way +to do it is by calling + +.. code-block:: bash + + stoolbox basis build --geometry input.fdf + +which will generate a yaml file with a basis specification that you can tweak manually. +Then, you can pass it directly to the optimization using the ``--config`` option: + +.. code-block:: bash + + stoolbox basis optim --geometry input.fdf --config my_config.yaml + +**Installing the optimizers:** The default optimizer is BADS (https://github.com/acerbilab/bads) +which is the one that we have found works best to optimize basis sets. The optimizer is however +not installed by default. You can install it using pip: + +.. code-block:: bash + + pip install pybads + +and the same would apply for other optimizers that you may want to use. + +**Output:** The output that appears on the terminal is left to the particular optimizer. +However, sisl generates `.dat` files which contain information about each SIESTA execution. +These files contain one column for each variable being optimized and one column for the +metric to minimize. + + +Python API +---------- + +The functions that do the work are also usable in python code by importing them: + +.. code-block:: python + + from sisl_toolbox.siesta.minimizer import optimize_basis, write_basis_to_yaml + +Here is their documentation: + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + optimize_basis + write_basis_to_yaml diff --git a/docs/toolbox/index.rst b/docs/toolbox/index.rst index e3f225dbfb..7221951736 100644 --- a/docs/toolbox/index.rst +++ b/docs/toolbox/index.rst @@ -22,10 +22,12 @@ Toolboxes should be imported directly. The implemented toolboxes are listed here: - + .. toctree:: :maxdepth: 1 transiesta/ts_fft siesta/atom_plot btd/btd + siesta/minimizer + basis/basis diff --git a/docs/toolbox/siesta/minimizer.rst b/docs/toolbox/siesta/minimizer.rst new file mode 100644 index 0000000000..aebdec4395 --- /dev/null +++ b/docs/toolbox/siesta/minimizer.rst @@ -0,0 +1,33 @@ +Minimizers +================= + +.. currentmodule:: sisl_toolbox.siesta.minimizer + +In `sisl_toolbox.siesta.minimizer` there is a collection of minimizers +that given some `variables`, a `runner` and a `metric` optimizes the +variables to minimize the metric. + +These are the minimizer classes implemented: + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + BaseMinimize + LocalMinimize + DualAnnealingMinimize + BADSMinimize + ParticleSwarmsMinimize + +For each of them, there is a subclass particularly tailored to optimize +SIESTA runs: + +.. autosummary:: + :toctree: generated/ + :nosignatures: + + MinimizeSiesta + LocalMinimizeSiesta + DualAnnealingMinimizeSiesta + BADSMinimizeSiesta + ParticleSwarmsMinimizeSiesta diff --git a/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py b/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py index 83a5868922..fc830a9b73 100644 --- a/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py +++ b/src/sisl_toolbox/siesta/minimizer/_minimize_siesta.py @@ -230,9 +230,11 @@ class DualAnnealingMinimizeSiesta(DualAnnealingMinimize, MinimizeSiesta): pass +@set_module("sisl_toolbox.siesta.minimizer") class BADSMinimizeSiesta(BADSMinimize, MinimizeSiesta): pass +@set_module("sisl_toolbox.siesta.minimizer") class ParticleSwarmsMinimizeSiesta(ParticleSwarmsMinimize, MinimizeSiesta): pass From cb85646f98300f3b0f67a1cb5113bf925ac43c3f Mon Sep 17 00:00:00 2001 From: Pol Febrer Date: Tue, 15 Apr 2025 19:00:32 +0200 Subject: [PATCH 6/9] typos --- docs/toolbox/basis/basis.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/toolbox/basis/basis.rst b/docs/toolbox/basis/basis.rst index e9e513616e..a7d8f9ad5e 100644 --- a/docs/toolbox/basis/basis.rst +++ b/docs/toolbox/basis/basis.rst @@ -31,10 +31,10 @@ message from the CLI. Here we will just mention some important things that could **Basis enthalpy:** The quantity that is minimized is the basis enthalpy. This is :math:`H = E + pV` with :math:`E` being the energy of the system, :math:`V` the volume of the basis and :math:`p` a "pressure" that -is defined in the fdf file with the `BasisPressure` table. This "pressure" penalizes bigger +is defined in the fdf file with the ```BasisPressure`` table. This "pressure" penalizes bigger basis, which result in more expensive calculations. It is the responsibility of the user to -set this value. As a rule of thumb, we recommend to set it to `0.02 GPa` for the first two -rows of the periodic table and `0.2 GPa` for the rest. +set this value. As a rule of thumb, we recommend to set it to ``0.02 GPa`` for the first two +rows of the periodic table and ``0.2 GPa`` for the rest. **The SIESTA command:** There is a ``--siesta-cmd`` option to specify the way of executing SIESTA. By default, it is simply calling the ``siesta`` command, but you could set it for example to ``mpirun -n 4 siesta`` @@ -69,7 +69,7 @@ not installed by default. You can install it using pip: and the same would apply for other optimizers that you may want to use. **Output:** The output that appears on the terminal is left to the particular optimizer. -However, sisl generates `.dat` files which contain information about each SIESTA execution. +However, sisl generates ``.dat`` files which contain information about each SIESTA execution. These files contain one column for each variable being optimized and one column for the metric to minimize. From 65de7d6a8bb8d5532a2542720bb7c59948cb56c6 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 19 Jun 2025 22:03:11 +0200 Subject: [PATCH 7/9] added alias for optim Signed-off-by: Nick Papior --- src/sisl_toolbox/siesta/atom/_register_cli.py | 1 + src/sisl_toolbox/siesta/minimizer/_register_cli.py | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/sisl_toolbox/siesta/atom/_register_cli.py b/src/sisl_toolbox/siesta/atom/_register_cli.py index a5ee737328..786dfbf402 100644 --- a/src/sisl_toolbox/siesta/atom/_register_cli.py +++ b/src/sisl_toolbox/siesta/atom/_register_cli.py @@ -9,6 +9,7 @@ from ._atom import atom_plot +# TODO once 3.10 hits, use `sys.orig_argv[0]` which is non-writeable _script = Path(sys.argv[0]).name diff --git a/src/sisl_toolbox/siesta/minimizer/_register_cli.py b/src/sisl_toolbox/siesta/minimizer/_register_cli.py index 3979c8c25a..c749cfb1ab 100644 --- a/src/sisl_toolbox/siesta/minimizer/_register_cli.py +++ b/src/sisl_toolbox/siesta/minimizer/_register_cli.py @@ -29,10 +29,11 @@ def basis_cli(subp=None, parser_kwargs={}): subp = p.add_subparsers(title="Commands") get_argparse_parser( - optimize_basis, name="optim", subp=subp, parser_kwargs=parser_kwargs + write_basis_to_yaml, name="build", subp=subp, parser_kwargs=parser_kwargs ) + parser_kwargs["aliases"] = ("optim",) get_argparse_parser( - write_basis_to_yaml, name="build", subp=subp, parser_kwargs=parser_kwargs + optimize_basis, name="optimize", subp=subp, parser_kwargs=parser_kwargs ) From d900425faa2f876a476859876db020d569dcecb9 Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Thu, 19 Jun 2025 22:11:49 +0200 Subject: [PATCH 8/9] added pyyaml as dependency Signed-off-by: Nick Papior --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index eea2e1c801..ed05485487 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ keywords = [ # Now all dependencies that were released around the same time should be the # lower bound of dependencies. dependencies = [ + "pyyaml", # We need npt.NDArray "numpy>=1.21", "scipy>=1.6", From ec336e9bc153e74c28ad2e10e5929e0434c9aadb Mon Sep 17 00:00:00 2001 From: Nick Papior Date: Fri, 20 Jun 2025 15:27:36 +0200 Subject: [PATCH 9/9] fixed and enhanced basis-optimizer Added needed dependency for pyyaml. While strictly not necessary, I think this is an ok thing. It is very much used elsewhere, and we might require it for other things. Made an alias for stoolbox basis optimize|optim. Fixed a thing for return numpy arrays in periodictable. The new CLI registry requires handling of union types, this will fix it by converting unions to str, regardless of what it actually is. Added information when basis information is requested but the entries won't be there due to too small numbers. E.g. soft confinement with V0 = 0 will effectively not do anything. Same for charge confinement for abs(Z) = 0. Added soft-confinement. Made charge-confinement available for all orbitals with 0 charge. Soft-confinement are added to all orbitals, which likely is overkill. It should probably only be done for narrow orbitals. Well... When *guessing* the orbitals I found some errors, at least for gold, it simply omitted the p orbitals, so now there is a warning, and then it will continue silently. It's better to rely on user-defined basis-information. Enabled reading psml files from SIESTA_PS_PATH (not psf!). Signed-off-by: Nick Papior --- docs/conf.py | 1 + docs/quickstart/install.rst | 1 + docs/toolbox/basis/basis.rst | 4 +- src/sisl/_core/periodictable.py | 4 +- src/sisl/_lib/_argparse.py | 12 +- .../siesta/minimizer/_atom_basis.py | 6 + .../siesta/minimizer/_minimize.py | 9 +- .../siesta/minimizer/basis_optimization.py | 325 ++++++++++++------ 8 files changed, 249 insertions(+), 113 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 051b43b319..b6bc97c4e4 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -199,6 +199,7 @@ .. _cmake: https://cmake.org .. _scikit-build-core: https://scikit-build-core.readthedocs.io/en/latest/ .. _netcdf4-py: https://github.com/Unidata/netcdf4-python +.. _pyyaml: https://pyyaml.org/wiki/PyYAMLDocumentation .. _numpy: https://www.numpy.org/ .. _scipy: https://docs.scipy.org/doc/scipy .. _pyparsing: https://github.com/pyparsing/pyparsing diff --git a/docs/quickstart/install.rst b/docs/quickstart/install.rst index e935c87095..27b1351bae 100644 --- a/docs/quickstart/install.rst +++ b/docs/quickstart/install.rst @@ -75,6 +75,7 @@ to run sisl, so generally one shouldn't worry about getting correct packages etc. Here the more detailed requirements are listed. - `Python`_ 3.9 or above +- `pyyaml`_ - `numpy`_ - `scipy`_ - `xarray`_ diff --git a/docs/toolbox/basis/basis.rst b/docs/toolbox/basis/basis.rst index a7d8f9ad5e..eb03064961 100644 --- a/docs/toolbox/basis/basis.rst +++ b/docs/toolbox/basis/basis.rst @@ -23,7 +23,7 @@ Then, one can directly run the optimization: .. code-block:: bash - stoolbox basis optim --geometry input.fdf + stoolbox basis optimize --geometry input.fdf with ``input.fdf`` being the input file for the calculation. This will use all the default values. Since there are many possible tweaks, we invite you to read carefully the help @@ -56,7 +56,7 @@ Then, you can pass it directly to the optimization using the ``--config`` option .. code-block:: bash - stoolbox basis optim --geometry input.fdf --config my_config.yaml + stoolbox basis optimize --geometry input.fdf --config my_config.yaml **Installing the optimizers:** The default optimizer is BADS (https://github.com/acerbilab/bads) which is the one that we have found works best to optimize basis sets. The optimizer is however diff --git a/src/sisl/_core/periodictable.py b/src/sisl/_core/periodictable.py index 1aa7b1ef27..a14326d19b 100644 --- a/src/sisl/_core/periodictable.py +++ b/src/sisl/_core/periodictable.py @@ -900,7 +900,7 @@ def conv(row, col): return "" if row.ndim == 0: - return conv(row[()], col[()]) + return np.asarray(conv(row, col), dtype=str) return np.asarray(list(map(conv, row, col)), dtype=str) @classmethod @@ -939,7 +939,7 @@ def conv(Z): return -1 if Z.ndim == 0: - return conv(Z[()]) + return np.asarray(conv(Z[()]), dtype=int) return np.asarray(list(map(conv, Z)), dtype=int) @classmethod diff --git a/src/sisl/_lib/_argparse.py b/src/sisl/_lib/_argparse.py index 0c860459b2..72aa6b6388 100644 --- a/src/sisl/_lib/_argparse.py +++ b/src/sisl/_lib/_argparse.py @@ -19,7 +19,12 @@ def is_optional(field): """Check whether the annotation for a parameter is an Optional type.""" - return typing.get_origin(field) is Union and type(None) in typing.get_args(field) + return is_union(field) and type(None) in typing.get_args(field) + + +def is_union(field): + """Check whether the annotation for a parameter is an Union type.""" + return typing.get_origin(field) is Union def is_literal(field): @@ -202,6 +207,11 @@ def get_argparse_parser( choices = get_literal_args(annotation) annotation = type(choices[0]) + # If the annotation is some-kind of union, just replace + # it with a string... + if is_union(annotation): + annotation = str + group.add_argument( *arg_names, type=annotation, diff --git a/src/sisl_toolbox/siesta/minimizer/_atom_basis.py b/src/sisl_toolbox/siesta/minimizer/_atom_basis.py index 9125cc8a45..ec33065f29 100644 --- a/src/sisl_toolbox/siesta/minimizer/_atom_basis.py +++ b/src/sisl_toolbox/siesta/minimizer/_atom_basis.py @@ -8,6 +8,7 @@ import sisl as si from sisl._internal import set_module +from sisl.messages import info from sisl.utils import NotNonePropertyDict from ._variable import Variable @@ -323,6 +324,7 @@ def basis(self): # get options for this shell opts = self.opts[(n, l)] + line = f"n={n} {l} {len(orbs)}" for key, value in opts.items(): if key == "pol": @@ -342,6 +344,8 @@ def basis(self): # explicit radius, convert to Bohr ri *= _Ang2Bohr line += f" {ri:.10f}" + else: + info("Omitting soft-confinement due to V0 < 0.001 eV") elif key == "split": # split-norm value @@ -363,6 +367,8 @@ def basis(self): line += f" {yukawa/_Ang2Bohr:.10f}" if not width is None: line += f" {width*_Ang2Bohr:.10f}" + else: + info("Omitting charge-confinement due to Z < 0.005 e") else: raise ValueError(f"Unknown option for n={n} l={l}: {key}") block.append(line) diff --git a/src/sisl_toolbox/siesta/minimizer/_minimize.py b/src/sisl_toolbox/siesta/minimizer/_minimize.py index f3f2b2b61b..aacf913d08 100644 --- a/src/sisl_toolbox/siesta/minimizer/_minimize.py +++ b/src/sisl_toolbox/siesta/minimizer/_minimize.py @@ -30,6 +30,9 @@ _log = logging.getLogger(__name__) +# TODO we should have some kind of checks for the bounds, whether they +# make sense or not. + def _convert_optimize_result(minimizer, result): """Convert optimize result to conform to the scaling procedure performed""" @@ -350,15 +353,13 @@ class BADSMinimize(BaseMinimize): It uses the pybads package to perform the minimization. """ - def run(self, options={}, get_hard_bounds: Callable = lambda x: x, **kwargs): + def run(self, options={}, func_hard_bounds: Callable = lambda x: x, **kwargs): from pybads.bads import BADS bounds = self.normalize_bounds() bounds = np.array(bounds) - bounds[bounds == 0] = 0.5 - - hard_bounds = get_hard_bounds(bounds) + hard_bounds = func_hard_bounds(bounds) with self: diff --git a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py index d16009147b..eeb1b34eb2 100644 --- a/src/sisl_toolbox/siesta/minimizer/basis_optimization.py +++ b/src/sisl_toolbox/siesta/minimizer/basis_optimization.py @@ -2,6 +2,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. import logging +import os import subprocess from pathlib import Path from typing import Dict, List, Literal, Optional, Tuple, Union @@ -10,6 +11,8 @@ import yaml as yaml_module import sisl +from sisl._core.periodictable import PeriodicTable +from sisl._help import has_module from sisl.messages import warn from sisl_toolbox.siesta.minimizer import ( AtomBasis, @@ -188,18 +191,20 @@ def get_valence_orbs_psf(psf: Union[str, Path]): return shells +BoolOrTupleBool = Union[bool, Tuple[bool]] + + def generate_basis_config( atoms: List[sisl.Atom], basis_size: str = "atoms", - optimize_pol: bool = False, - pol_optimize_screening: bool = False, - pol_optimize_width: bool = False, + optimize_charge_conf: BoolOrTupleBool = False, + optimize_soft_conf: BoolOrTupleBool = False, initial_atoms_basis: bool = False, optimize_atoms_basis: bool = False, cutoff_upper_bound: float = 5.5, variable_delta: float = 0.05, ): - """Generates a basis configuration file to be fed to the optimizer. + r"""Generates a basis configuration file to be fed to the optimizer. Non-polarization orbitals are set to optimize their cutoff radii, while polarization orbitals are set to be optimized through the charge @@ -220,18 +225,20 @@ def generate_basis_config( X can also be S, D, T, Q for 1, 2, 3, 4 zetas respectively. E.g. "DZ". It can also be "atoms", which exactly takes the orbitals present in the atoms object. - optimize_pol : bool, optional - If the basis contains polarization orbitals, whether they should be - explicitly optimized or not. If not, the default polarization orbitals - will be requested. - - The polarization orbitals are optimized through the charge confinement. - pol_optimize_screening : bool, optional - If polarization orbitals are optimized, whether the screening length - of the charge confinement should be optimized or not. - pol_optimize_width : bool, optional - If polarization orbitals are optimized, whether the width of the charge confinement - should be optimized or not. + optimize_charge_conf : bool, tuple of bool, optional + Whether some orbitals will be optimized for charge-confinement. + If true, it will optimize, the charge (Q), the Yukawa screening parameter + :math:`\lambda`, and the singularity regularization parameter :math:`\delta`. + + For multiple arguments, each can be fine-tuned on/off. + + This will only optimize charge-confinement for *empty* orbitals (:math:`q=0`). + optimize_soft_conf : bool, tuple of bool, optional + Whether all orbitals will be optimized for soft-confinement. + If true, it will optimize, the potential (:math:`V_0`), and the inner + radius :math:`r_i`. + + For multiple arguments, each can be fine-tuned on/off. initial_atoms_basis: bool, optional If True, the orbitals that are in the Atoms object are taken as the initial values for basis optimization. @@ -257,11 +264,26 @@ def generate_basis_config( polarization = basis_size[-1] == "P" - optimize_pol = optimize_pol and polarization else: n_zetas = None polarization = False - optimize_pol = False + + def _tuplilize(arg, n: int): + """Convert a string (comma-separated) to a list of entries""" + if isinstance(arg, str): + arg = [bool(o) for o in arg.split(",")] + + if isinstance(arg, bool): + arg = [arg] + + if len(arg) == 1: + arg = arg * n + + assert len(arg) == n + return arg + + optimize_charge_conf = _tuplilize(optimize_charge_conf, 3) + optimize_soft_conf = _tuplilize(optimize_soft_conf, 2) # Helper function to determine lower bounds for cutoff radii. def get_cutoff_lowerbound(orb_name, i_zeta): @@ -269,11 +291,10 @@ def get_cutoff_lowerbound(orb_name, i_zeta): The lower bound depends on which orbital it is and for which zeta the cutoff is. """ - if i_zeta > 0: - if i_zeta == 1: - return 1 - else: - return 0 + if i_zeta == 1: + return 1 + elif i_zeta > 1: + return 0 elif orb_name == "1s": return 2 else: @@ -281,6 +302,10 @@ def get_cutoff_lowerbound(orb_name, i_zeta): def _orbs_from_row_and_block(table_row, atom_block): """To be used if the orbitals to optimize can't be inferred from the pseudopotential.""" + warn( + "The orbitals are pre-filled, but likely they are not correct. " + "Please check the basis before doing excessive optimizations!" + ) if table_row == 0: orbs = ["1s"] elif table_row <= 2: @@ -300,9 +325,75 @@ def _orbs_from_row_and_block(table_row, atom_block): return orbs + def _get_charge_conf(): + r"""Return a dict with charge-confinement optimizations. + + This is based on: + + .. math: + + V_Q(r) = \frac{Ze^{-\lambda r}}{\sqrt{r^2+\delta^2}} + + """ + charge_conf = { + "charge": { + "initial": 3.0, + "bounds": [1.0, 10.0], + "delta": variable_delta, + }, + "yukawa": { + "initial": 0.0, + "bounds": [0.0, 3.0], + "delta": 0.5, + }, + "width": { + "initial": 0.0053, + "bounds": [0.0, 0.5], + "delta": 0.05, + }, + } + for key, optimize in zip(("charge", "yukawa", "width"), optimize_charge_conf): + if not optimize: + del charge_conf[key] + return charge_conf + + def _get_soft_conf(): + r"""Return a dict with soft-confinement optimizations. + + This is based on: + + .. math: + + V(r) = V_0\frac{e^{-\frac{r_c - r_i}{r-r_i}}}{r_c-r} + + """ + soft_conf = { + "ri": { + # Negative numbers means it will be based on *rc* + "initial": -0.9, + "bounds": [-1.0, -0.5], + "delta": 0.08, + }, + "V0": { + "initial": 10.0, + "bounds": [0.0, 1000.0], + "delta": 50.0, + }, + } + for key, optimize in zip(("V0", "ri"), optimize_soft_conf): + if not optimize: + del soft_conf[key] + return soft_conf + + PT = PeriodicTable() + config_dict = {} + # Loop through atoms and generate a basis config for each, adding it to config_dict. for atom in atoms: + table_row = PT.Z_row(abs(atom.Z)) + atom_block = PT.Z_block(abs(atom.Z)) + # TODO check these are equivalent table_row, atom_block = get_row(atom.Z) valence_n = table_row + 1 @@ -312,9 +403,14 @@ def _orbs_from_row_and_block(table_row, atom_block): tag = atom.tag if basis_size == "atoms": orbs = ["dummy"] - elif Path(f"{tag}.psml").exists(): + elif (psml_path := Path(f"{tag}.psml")).exists(): # From the psml pseudopotential - orbs = get_valence_orbs_psml(f"{tag}.psml") + orbs = get_valence_orbs_psml(psml_path) + elif ( + psml_path := Path(os.environ.get("SIESTA_PS_PATH", "")) / f"{tag}.psml" + ).exists(): + # From the psml pseudopotential + orbs = get_valence_orbs_psml(psml_path) else: # If we didn't find any psml file, we just generate the valence orbitals using # the row and block of the atom. @@ -356,12 +452,15 @@ def _orbs_from_row_and_block(table_row, atom_block): } # Read in the initial basis if requested. + + # The provided basis contains the ranges of the orbitals from the `Atoms` + # object passed. provided_basis = {} if initial_atoms_basis or basis_size == "atoms": for orbital in atom.orbitals: - name = orbital.name() - orb_name, zeta = name[:2], int(name.split("Z")[1][0]) + orb_name = f"{orbital.n}{orbital.l}" + zeta = orbital.zeta provided_basis[orb_name, zeta] = orbital.R @@ -387,6 +486,15 @@ def _orbs_from_row_and_block(table_row, atom_block): else: basis[f"zeta{zeta}"] = orbital.R + if abs(orbital.q0) < 1e-8 and any(optimize_charge_conf): + # charge-confinement is typically only suitable for + # 0-charge orbitals. + basis["charge-confinement"] = _get_charge_conf() + + if any(optimize_soft_conf): + # soft-confinement optimization + basis["soft-confinement"] = _get_soft_conf() + if basis_size == "atoms": # We have already set up the basis from the atoms. continue @@ -394,30 +502,34 @@ def _orbs_from_row_and_block(table_row, atom_block): # If we are here, it means that the size of the basis is not determined by the provided # atoms object, that is, basis size is something like DZP. - def get_cutoff_upper_bound(orb_name, i_zeta): - if (orb_name, i_zeta) in provided_basis: - return provided_basis[(orb_name, i_zeta)] * 0.95 - else: + def get_cutoff_upper_bound(orb_name, zeta): + cutoff = provided_basis.get((orb_name, zeta), None) + if cutoff is None: return cutoff_upper_bound + return cutoff * 0.95 # Loop through non-polarization orbitals and add them to the basis. for orb_name in orbs: orb_basis = {} - if polarization and not optimize_pol: + if polarization: if orb_name == orb_to_polarize: - orb_basis["pol"] = 1 + orb_basis["polarization"] = 1 + + if any(optimize_soft_conf): + # soft-confinement optimization + orb_basis["soft-confinement"] = _get_soft_conf() - for i_zeta in range(n_zetas): - orb_id = (orb_name, i_zeta + 1) + for i_zeta in range(1, n_zetas + 1): + orb_id = (orb_name, i_zeta) - orb_basis[f"zeta{i_zeta + 1}"] = { + orb_basis[f"zeta{i_zeta}"] = { "initial": provided_basis.get(orb_id, 3.0), } if orb_id not in provided_basis: # Orbital not provided, we need to optimize it from scratch. - orb_basis[f"zeta{i_zeta + 1}"].update( + orb_basis[f"zeta{i_zeta}"].update( { "bounds": [ get_cutoff_lowerbound(orb_name, i_zeta), @@ -428,7 +540,12 @@ def get_cutoff_upper_bound(orb_name, i_zeta): ) else: # Orbital provided, we don't need to optimize it. - orb_basis[f"zeta{i_zeta + 1}"] = provided_basis[orb_id] + orb_basis[f"zeta{i_zeta}"] = provided_basis[orb_id] + + # In this case the orbitals are based on the valence configuration. + # I.e. all orbitals have charge. In this case it does not + # make sense to add charge-confinement optimizations. + # For this, only the polarization orbital would gain this. element_config[orb_name] = {"basis": orb_basis} @@ -436,50 +553,28 @@ def get_cutoff_upper_bound(orb_name, i_zeta): if polarization: for orb_name in pol_orbs: + pol_orb_basis = {} + # Check if the polarization orbital is already in the provided basis. if (orb_name, 1) in provided_basis: # Orbital provided in the basis - pol_orb_basis = {} - zeta = 1 while (orb_name, zeta) in provided_basis: pol_orb_basis[f"zeta{zeta}"] = provided_basis[(orb_name, zeta)] zeta += 1 - elif optimize_pol: + elif any(optimize_charge_conf): # Orbital not provided in the basis, and we need to optimize it. - pol_orb_basis = { - "polarizes": orb_to_polarize, - "charge-confinement": { - "charge": { - "initial": 3.0, - "bounds": [1.0, 10.0], - "delta": variable_delta, - }, - }, - } - - charge_confinement = pol_orb_basis["charge-confinement"] - - if pol_optimize_screening: - # Screening length (Ang-1) - charge_confinement["yukawa"] = { - "initial": 0.0, - "bounds": [0.0, 3.0], - "delta": 0.5, - } - if pol_optimize_width: - # Width for the potential (Ang) - charge_confinement["width"] = { - "initial": 0.0053, - "bounds": [0.0, 0.5], - "delta": 0.05, - } + pol_orb_basis["polarizes"] = orb_to_polarize else: continue + if any(optimize_charge_conf): + pol_orb_basis["charge-confinement"] = _get_charge_conf() + if any(optimize_soft_conf): + orb_basis["soft-confinement"] = _get_soft_conf() element_config[orb_name] = {"basis": pol_orb_basis} return config_dict @@ -542,7 +637,7 @@ def set_minimizer_variables( # Remove the specification for a default polarized orbital, since we are going # to explicitly include it. - atom_config[polarized_orb]["basis"].pop("pol") + atom_config[polarized_orb]["basis"].pop("polarization") # Loop through all the zetas of the orbital that this one polarizes and # copy the cutoffs. @@ -563,11 +658,10 @@ def set_minimizer_variables( else: # We are not optimizing the polarization orbitals, so we need to tell SIESTA # to use its default polarization orbitals, for the polarized orbital. - atom_config[polarized_orb]["basis"]["pol"] = 1 + atom_config[polarized_orb]["basis"]["polarization"] = 1 # Modify the basis. basis[atom_key] = AtomBasis.from_dict(atom_config) - atom_basis = basis[atom_key] # Now that we have the appropiate basis for our next minimization, gather the @@ -618,7 +712,7 @@ def write_basis(basis: Dict[str, AtomBasis], fdf: Union[str, Path]): Path to the fdf file where to write the basis specification. """ full_basis = [] - for atom_key, atom_basis in basis.items(): + for atom_basis in basis.values(): full_basis.extend(atom_basis.basis()) sisl.io.siesta.fdfSileSiesta(fdf, "w").set("PAO.Basis", full_basis) @@ -627,9 +721,8 @@ def write_basis(basis: Dict[str, AtomBasis], fdf: Union[str, Path]): def write_basis_to_yaml( geometry: str, size: str = "DZP", - optimize_pol: bool = False, - pol_optimize_screening: bool = False, - pol_optimize_width: bool = False, + optimize_charge_conf: BoolOrTupleBool = False, + optimize_soft_conf: BoolOrTupleBool = False, variable_delta: float = 0.05, initial_basis: Optional[str] = None, optimize_initial_basis: bool = False, @@ -641,7 +734,7 @@ def write_basis_to_yaml( The function looks at possible pseudopotential files (first psml, then psf) to determine which orbitals to include in the basis. Otherwise, it just generates - the valence orbitals for the usual conventions, without semicores. + the valence orbitals for the usual conventions, without semi-cores. Parameters ---------- @@ -655,16 +748,20 @@ def write_basis_to_yaml( It can also be "fdf" or "basis" to use the basis size as read from the `initial_basis` argument. This forces the `initial_basis` argument to be equal to the `size` argument. - optimize_pol : bool, optional - If the basis contains polarization orbitals, whether they should be - explicitly optimized or not. If not, the default polarization orbitals - will be requested. - pol_optimize_screening : bool, optional - If polarization orbitals are optimized, whether the screening length - of the charge confinement should be optimized or not. - pol_optimize_width : bool, optional - If polarization orbitals are optimized, whether the width of the charge confinement - should be optimized or not. + optimize_charge_conf : bool, tuple of bool, optional + Whether some orbitals will be optimized for charge-confinement. + If true, it will optimize, the charge (Q), the Yukawa screening parameter + :math:`\lambda`, and the singularity regularization parameter :math:`\delta`. + + For multiple arguments, each can be fine-tuned on/off. + + This will only optimize charge-confinement for *empty* orbitals (:math:`q=0`). + optimize_soft_conf : bool, tuple of bool, optional + Whether all orbitals will be optimized for soft-confinement. + If true, it will optimize, the potential (:math:`V_0`), and the inner + radius :math:`r_i`. + + For multiple arguments, each can be fine-tuned on/off. variable_delta : float, optional Delta that specifies the step size in changes to the optimized variables. It is used differently depending on the optimizer. @@ -698,9 +795,8 @@ def write_basis_to_yaml( basis_config = generate_basis_config( atoms, size, - optimize_pol=optimize_pol, - pol_optimize_screening=pol_optimize_screening, - pol_optimize_width=pol_optimize_width, + optimize_charge_conf=optimize_charge_conf, + optimize_soft_conf=optimize_soft_conf, variable_delta=variable_delta, initial_atoms_basis=initial_basis is not None, optimize_atoms_basis=optimize_initial_basis, @@ -715,21 +811,29 @@ def write_basis_to_yaml( return dumps +DEFAULT_OPTIMIZER: str = "local" +if has_module("pyswarms"): + DEFAULT_OPTIMIZER = "swarms" +if has_module("pybads"): + DEFAULT_OPTIMIZER = "bads" + + def optimize_basis( geometry: str, size: str = "DZP", - optimize_pol: bool = False, + optimize_charge_conf: BoolOrTupleBool = False, + optimize_soft_conf: BoolOrTupleBool = False, variable_delta: float = 0.05, basis_spec: dict = {}, start: str = "z1", # "z{x}" or pol - stop: str = "pol", # "z{x}" or pol + stop: str = "polarization", # "z{x}" or pol tol_fun: float = 1e-3, tol_stall_iters: int = 4, siesta_cmd: str = "siesta", out: str = "basisoptim.dat", out_fmt: str = "20.17e", logging_level: Literal["critical", "notset", "info", "debug"] = "notset", - optimizer: Literal["bads", "swarms", "local"] = "bads", + optimizer: Literal["bads", "swarms", "local"] = DEFAULT_OPTIMIZER, optimizer_kwargs: dict = {}, ): """Optimizes a basis set for a given geometry. @@ -752,7 +856,7 @@ def optimize_basis( - Finally, it optimizes the polarization orbitals (if requested).\n\n - Appropiate optimization bounds are set for each parameter.\n\n + Appropriate optimization bounds are set for each parameter.\n\n At each step, all corresponding parameters are optimized simultaneously.\n\n @@ -766,7 +870,7 @@ def optimize_basis( To determine which orbitals to include in the basis, the function looks at possible pseudopotential files (first psml, then psf). If they are not there, it just generates - the valence orbitals for the usual conventions, without semicores. + the valence orbitals for the usual conventions, without semi-cores. This function only provides the simplest specification for basis shapes. If you need more customization control, first generate a basis specification and then provide it to this @@ -790,10 +894,20 @@ def optimize_basis( It can also be "fdf" or "basis" to use the basis size as read from the `initial_basis` argument. This forces the `initial_basis` argument to be equal to the `size` argument. - optimize_pol : - If the basis contains polarization orbitals, whether they should be - explicitly optimized or not. If not, the default polarization orbitals - will be requested. + optimize_charge_conf : bool, tuple of bool, optional + Whether some orbitals will be optimized for charge-confinement. + If true, it will optimize, the charge (Q), the Yukawa screening parameter + :math:`\lambda`, and the singularity regularization parameter :math:`\delta`. + + For multiple arguments, each can be fine-tuned on/off. + + This will only optimize charge-confinement for *empty* orbitals (:math:`q=0`). + optimize_soft_conf : bool, tuple of bool, optional + Whether all orbitals will be optimized for soft-confinement. + If true, it will optimize, the potential (:math:`V_0`), and the inner + radius :math:`r_i`. + + For multiple arguments, each can be fine-tuned on/off. variable_delta : Delta that specifies the step size in changes to the optimized variables. It is used differently depending on the optimizer. @@ -804,9 +918,9 @@ def optimize_basis( You can use ``write_yaml`` to generate a template, and then tailor it to your needs. start : At which step to start optimizing. Previous steps will be read from previous runs. - Can be "z{x}" or "pol" where x is the zeta shell number. + Can be "z{x}" or "polarization" where x is the zeta shell number. stop : - At which step to stop optimizing. Can be "z{x}" or "pol" where x is the zeta shell number. + At which step to stop optimizing. Can be "z{x}" or "polarization" where x is the zeta shell number. The optimization is stopped AFTER optimizing this step. tol_fun : Tolerance for the optimization function value. If the function value changes less than this @@ -827,10 +941,12 @@ def optimize_basis( Which optimizer to use. Can be: - "bads": Uses pybads to run the Bayesian Adaptive Direct Search (BADS) algorithm - for optimization. When benchmarking algorithms, BADS has proven to be the best at finding the - global minimum, this is why it is the default. + for optimization. When benchmarking algorithms, BADS has proven to be the best at finding the + global minimum, this is why it is the default (if available). - "swarms": Uses pyswarms to run a Particle Swarms optimization. + Is default if importable, and `pybads` is not available. - "local": Uses the scipy.optimize.minimize function. + Is default if neither of `pybads` or `pyswarms` are importable. optimizer_kwargs : Keyword arguments to pass to the optimizer. """ @@ -848,7 +964,8 @@ def optimize_basis( basis_yaml = write_basis_to_yaml( geometry=geometry, size=size, - optimize_pol=optimize_pol, + optimize_charge_conf=optimize_charge_conf, + optimize_soft_conf=optimize_soft_conf, variable_delta=variable_delta, out=None, ) @@ -935,7 +1052,7 @@ def get_hard_bounds(bounds): { "zeta": None, "polarization": True, - "out_prefix": "pol", + "out_prefix": "polarization", "skip": False, "stop": True, "get_hard_bounds": lambda bounds: bounds,