Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ neps.run(
pipeline_space=pipeline_space,
root_directory="path/to/save/results", # Replace with the actual path.
evaluations_to_spend=100,
write_summary_to_disk=True,
)

# 4. status information about a neural pipeline search run, using:
Expand Down
21 changes: 1 addition & 20 deletions docs/reference/analyse.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,7 @@ python -m neps.plot ROOT_DIRECTORY
Currently, this creates one plot that shows the best error value across the number of evaluations.

## What's on disk?
In the root directory, NePS maintains several files at all times that are human readable and can be useful
If you pass the `write_summary_to_disk=` argument to [`neps.run()`][neps.api.run],
NePS will generate a summary CSV and TXT files for you.

=== "`neps.run(..., write_summary_to_disk=True)`"
NePS maintains several human-readable files in the `ROOT_DIRECTORY`. Additionally, it generates a summary folder (located at `ROOT_DIRECTORY/summary/`) which contains useful reports on the run.

```
ROOT_DIRECTORY
Expand All @@ -61,21 +57,6 @@ NePS will generate a summary CSV and TXT files for you.
└── optimizer_state.pkl
```


=== "`neps.run(..., write_summary_to_disk=False)`"

```
ROOT_DIRECTORY
├── results
│ └── config_1
│ ├── config.yaml
│ ├── metadata.yaml
│ └── report.yaml
├── optimizer_info.yaml
└── optimizer_state.pkl
```


The `full.csv` contains all configuration details in CSV format.
Details include configuration hyperparameters and any returned result and cost from the `evaluate_pipeline` function.

Expand Down
4 changes: 0 additions & 4 deletions docs/reference/evaluate_pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ All other values raise a `TypeError` inside NePS.
user_result=result_dict,
pipeline_id=pipeline_id,
root_directory=root_directory,
post_run_summary=True,
)
```

Expand Down Expand Up @@ -122,12 +121,9 @@ neps.save_pipeline_results(
user_result=result,
pipeline_id=args.pipeline_id,
root_directory=Path(args.root_dir),
post_run_summary=False,
)
```

the default value for `post_run_summary` is True, if you want to prevent any summary creation, you should specify in the arguments.

### 3.3 Why this matters

* No worker idles while your job is in the queue ➜ better throughput.
Expand Down
6 changes: 0 additions & 6 deletions docs/reference/neps_run.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,6 @@ provided to [`neps.run()`][neps.api.run].
└── optimizer_state.pkl # The optimizer's state, shared between workers
```

=== "python"

```python
neps.run(..., write_summary_to_disk=True)
```

To capture the results of the optimization process, you can use tensorbaord logging with various utilities to integrate
closer to NePS. For more information, please refer to the [analyses page](../reference/analyse.md) page.

Expand Down
41 changes: 15 additions & 26 deletions neps/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def run( # noqa: C901, D417, PLR0913
root_directory: str | Path = "neps_results",
overwrite_root_directory: bool = False,
evaluations_to_spend: int | None = None,
write_summary_to_disk: bool = True,
max_evaluations_total: int | None = None,
max_evaluations_per_run: int | None = None,
continue_until_max_evaluation_completed: bool = False,
Expand Down Expand Up @@ -201,9 +200,6 @@ def evaluate_pipeline(some_parameter: float) -> float:
overwrite_root_directory: If true, delete the working directory at the start of
the run. This is, e.g., useful when debugging a evaluate_pipeline function.
write_summary_to_disk: If True, creates a csv and txt files after each worker is done,
holding summary information about the configs and results.
max_evaluations_per_run: Number of evaluations this specific call should do.
??? note "Limitation on Async mode"
Currently, there is no specific number to control number of parallel evaluations running with
Expand Down Expand Up @@ -538,30 +534,26 @@ def __call__(
ignore_errors=ignore_errors,
overwrite_optimization_dir=overwrite_root_directory,
sample_batch_size=sample_batch_size,
write_summary_to_disk=write_summary_to_disk,
worker_id=worker_id,
)

post_run_csv(root_directory)
root_directory = Path(root_directory)
summary_dir = root_directory / "summary"
if not write_summary_to_disk:
trajectory_of_improvements(root_directory)
logger.info(
"The summary folder has been created, which contains csv and txt files with"
"the output of all data in the run (short.csv - only the best; full.csv - "
"all runs; best_config_trajectory.txt for incumbent trajectory; and "
"best_config.txt for final incumbent)."
f"\nYou can find summary folder at: {summary_dir}."
)
trajectory_of_improvements(root_directory)
logger.info(
"The summary folder has been created, which contains csv and txt files with"
"the output of all data in the run (short.csv - only the best; full.csv - "
"all runs; best_config_trajectory.txt for incumbent trajectory; and "
"best_config.txt for final incumbent)."
f"\nYou can find summary folder at: {summary_dir}."
)


def save_pipeline_results(
user_result: dict,
pipeline_id: str,
root_directory: Path,
*,
post_run_summary: bool = True,
) -> None:
"""Persist the outcome of one pipeline evaluation.
Expand All @@ -575,8 +567,6 @@ def save_pipeline_results(
neps.core.trial.Trial object inside the optimisation state.
root_directory (Path): Root directory of the NePS run (contains
optimizer_info.yaml and configs/ folder).
post_run_summary (bool, optional): If True, creates a CSV file after
trial completion, holding summary info about configs and results.
"""
_save_results(
Expand All @@ -585,14 +575,13 @@ def save_pipeline_results(
root_directory=root_directory,
)

if post_run_summary:
full_frame_path, short_path = post_run_csv(root_directory)
logger.info(
"The post run summary has been created, which is a csv file with the "
"output of all data in the run."
f"\nYou can find a full dataframe at: {full_frame_path}."
f"\nYou can find a quick summary at: {short_path}."
)
full_frame_path, short_path = post_run_csv(root_directory)
logger.info(
"The post run summary has been created, which is a csv file with the "
"output of all data in the run."
f"\nYou can find a full dataframe at: {full_frame_path}."
f"\nYou can find a quick summary at: {short_path}."
)


def import_trials(
Expand Down
125 changes: 60 additions & 65 deletions neps/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,31 +521,30 @@ def run(self) -> None: # noqa: C901, PLR0912, PLR0915
_set_workers_neps_state(self.state)

main_dir = Path(self.state.path)
if self.settings.write_summary_to_disk:
full_df_path, short_path, csv_locker = _initiate_summary_csv(main_dir)

# Create empty CSV files
with csv_locker.lock():
full_df_path.parent.mkdir(parents=True, exist_ok=True)
full_df_path.touch(exist_ok=True)
short_path.touch(exist_ok=True)

summary_dir = main_dir / "summary"
summary_dir.mkdir(parents=True, exist_ok=True)

improvement_trace_path = summary_dir / "best_config_trajectory.txt"
improvement_trace_path.touch(exist_ok=True)
best_config_path = summary_dir / "best_config.txt"
best_config_path.touch(exist_ok=True)
_trace_lock = FileLock(".trace.lock")
_trace_lock_path = Path(str(_trace_lock.lock_file))
_trace_lock_path.touch(exist_ok=True)
full_df_path, short_path, csv_locker = _initiate_summary_csv(main_dir)

logger.info(
"Summary files can be found in the “summary” folder inside"
"the root directory: %s",
summary_dir,
)
# Create empty CSV files
with csv_locker.lock():
full_df_path.parent.mkdir(parents=True, exist_ok=True)
full_df_path.touch(exist_ok=True)
short_path.touch(exist_ok=True)

summary_dir = main_dir / "summary"
summary_dir.mkdir(parents=True, exist_ok=True)

improvement_trace_path = summary_dir / "best_config_trajectory.txt"
improvement_trace_path.touch(exist_ok=True)
best_config_path = summary_dir / "best_config.txt"
best_config_path.touch(exist_ok=True)
_trace_lock = FileLock(".trace.lock")
_trace_lock_path = Path(str(_trace_lock.lock_file))
_trace_lock_path.touch(exist_ok=True)

logger.info(
"Summary files can be found in the “summary” folder inside"
"the root directory: %s",
summary_dir,
)

previous_trials = self.state.lock_and_read_trials()
if len(previous_trials):
Expand Down Expand Up @@ -720,50 +719,48 @@ def run(self) -> None: # noqa: C901, PLR0912, PLR0915
self.state.new_score,
)

if self.settings.write_summary_to_disk:
# Store in memory for later file re-writing
self.state.all_best_configs.append(
{
"score": self.state.new_score,
"trial_id": evaluated_trial.id,
"config": evaluated_trial.config,
}
)
# Store in memory for later file re-writing
self.state.all_best_configs.append(
{
"score": self.state.new_score,
"trial_id": evaluated_trial.id,
"config": evaluated_trial.config,
}
)

# Build trace text and best config text
trace_text = (
"Best configs and their objectives across evaluations:\n"
+ "-" * 80
+ "\n"
)
for best in self.state.all_best_configs:
trace_text += (
f"Objective to minimize: {best['score']}\n"
f"Config ID: {best['trial_id']}\n"
f"Config: {best['config']}\n" + "-" * 80 + "\n"
)

best_config = self.state.all_best_configs[-1] # Latest best
best_config_text = (
f"# Best config:"
f"\n\n Config ID: {best_config['trial_id']}"
f"\n Objective to minimize: {best_config['score']}"
f"\n Config: {best_config['config']}"
# Build trace text and best config text
trace_text = (
"Best configs and their objectives across evaluations:\n"
+ "-" * 80
+ "\n"
)
for best in self.state.all_best_configs:
trace_text += (
f"Objective to minimize: {best['score']}\n"
f"Config ID: {best['trial_id']}\n"
f"Config: {best['config']}\n" + "-" * 80 + "\n"
)

# Write files from scratch
with _trace_lock:
with improvement_trace_path.open(mode="w") as f:
f.write(trace_text)
best_config = self.state.all_best_configs[-1] # Latest best
best_config_text = (
f"# Best config:"
f"\n\n Config ID: {best_config['trial_id']}"
f"\n Objective to minimize: {best_config['score']}"
f"\n Config: {best_config['config']}"
)

# Write files from scratch
with _trace_lock:
with improvement_trace_path.open(mode="w") as f:
f.write(trace_text)

with best_config_path.open(mode="w") as f:
f.write(best_config_text)
with best_config_path.open(mode="w") as f:
f.write(best_config_text)

if self.settings.write_summary_to_disk:
full_df, short = status(main_dir)
with csv_locker.lock():
full_df.to_csv(full_df_path)
short.to_frame().to_csv(short_path)
full_df, short = status(main_dir)
with csv_locker.lock():
full_df.to_csv(full_df_path)
short.to_frame().to_csv(short_path)

logger.debug("Config %s: %s", evaluated_trial.id, evaluated_trial.config)
logger.debug("Loss %s: %s", evaluated_trial.id, report.objective_to_minimize)
Expand Down Expand Up @@ -965,7 +962,6 @@ def _launch_runtime( # noqa: PLR0913
fidelities_to_spend: int | float | None,
max_evaluations_for_worker: int | None,
sample_batch_size: int | None,
write_summary_to_disk: bool = True,
worker_id: str | None = None,
) -> None:
default_report_values = _make_default_report_values(
Expand Down Expand Up @@ -1043,7 +1039,6 @@ def _launch_runtime( # noqa: PLR0913
max_wallclock_time_for_worker_seconds=None, # TODO: User can't specify yet
max_evaluation_time_for_worker_seconds=None, # TODO: User can't specify yet
max_cost_for_worker=None, # TODO: User can't specify yet
write_summary_to_disk=write_summary_to_disk,
)

# HACK: Due to nfs file-systems, locking with the default `flock()` is not reliable.
Expand Down
5 changes: 0 additions & 5 deletions neps/state/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,3 @@ class WorkerSettings:
If `None`, there is no limit and this worker will continue to evaluate
indefinitely or until another stopping criterion is met.
"""

write_summary_to_disk: bool = True
"""If True, creates a csv and txt files after each worker is done,
holding summary information about the configs and results.
"""
2 changes: 1 addition & 1 deletion neps_examples/basic_usage/hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ def evaluate_pipeline(float1, float2, categorical, integer1, integer2):
evaluate_pipeline=evaluate_pipeline,
pipeline_space=pipeline_space,
root_directory="results/hyperparameters_example",
evaluations_to_spend=30,
evaluations_to_spend=15,
worker_id=f"worker_1-{socket.gethostname()}-{os.getpid()}",
)
Loading