Source code for scripts.training.optuna_hpo_support

"""Reusable Optuna helpers for repository-owned neural HPO studies."""

from __future__ import annotations

# Import Python Utilities
from copy import deepcopy
from pathlib import Path
from typing import Any
import json

# Import Optuna Utilities
import optuna

# Import YAML Utilities
import yaml


[docs] def load_yaml_dictionary(input_path: Path) -> dict[str, Any]: """Load one YAML dictionary from disk.""" with input_path.open("r", encoding="utf-8") as input_file: payload = yaml.safe_load(input_file) assert isinstance(payload, dict), f"YAML file must contain a dictionary | {input_path}" return payload
[docs] def save_yaml_dictionary(payload: dict[str, Any], output_path: Path) -> None: """Persist one YAML dictionary to disk.""" output_path.parent.mkdir(parents=True, exist_ok=True) with output_path.open("w", encoding="utf-8") as output_file: yaml.safe_dump(payload, output_file, sort_keys=False)
[docs] def clone_dictionary(payload: dict[str, Any]) -> dict[str, Any]: """Clone one nested dictionary payload.""" return deepcopy(payload)
[docs] def resolve_nested_dictionary_value(payload: dict[str, Any], dotted_key: str) -> Any: """Resolve one nested dictionary value from a dotted key.""" current_value: Any = payload for key_token in dotted_key.split("."): assert isinstance(current_value, dict), f"Cannot traverse non-dictionary payload | {dotted_key}" assert key_token in current_value, f"Missing nested key | {dotted_key}" current_value = current_value[key_token] return current_value
[docs] def set_nested_dictionary_value(payload: dict[str, Any], dotted_key: str, value: Any) -> None: """Set one nested dictionary value from a dotted key.""" key_token_list = dotted_key.split(".") current_value: dict[str, Any] = payload for key_token in key_token_list[:-1]: nested_value = current_value.setdefault(key_token, {}) assert isinstance(nested_value, dict), f"Cannot traverse non-dictionary payload | {dotted_key}" current_value = nested_value current_value[key_token_list[-1]] = value
[docs] def build_sqlite_storage_url(storage_path: Path) -> str: """Build one SQLite storage URL for Optuna.""" storage_path.parent.mkdir(parents=True, exist_ok=True) return f"sqlite:///{storage_path.resolve().as_posix()}"
[docs] def build_optuna_sampler(sampler_dictionary: dict[str, Any]) -> optuna.samplers.BaseSampler: """Build one Optuna sampler from a small config dictionary.""" sampler_type = str(sampler_dictionary.get("type", "TPESampler")).strip() if sampler_type == "TPESampler": return optuna.samplers.TPESampler( seed=int(sampler_dictionary.get("seed", 42)), n_startup_trials=int(sampler_dictionary.get("n_startup_trials", 5)), ) if sampler_type == "RandomSampler": return optuna.samplers.RandomSampler( seed=int(sampler_dictionary.get("seed", 42)), ) raise ValueError(f"Unsupported Optuna sampler type | {sampler_type}")
[docs] def sample_parameter_dictionary( trial: optuna.trial.Trial, search_space_dictionary: dict[str, dict[str, Any]], ) -> dict[str, Any]: """Sample one parameter dictionary from the configured search space.""" sampled_parameter_dictionary: dict[str, Any] = {} for dotted_key, parameter_specification in search_space_dictionary.items(): parameter_type = str(parameter_specification.get("type", "categorical")).strip() parameter_name = str(parameter_specification.get("name", dotted_key)).strip() if parameter_type == "float": sampled_parameter_dictionary[dotted_key] = trial.suggest_float( parameter_name, float(parameter_specification["low"]), float(parameter_specification["high"]), log=bool(parameter_specification.get("log", False)), step=parameter_specification.get("step"), ) continue if parameter_type == "int": sampled_parameter_dictionary[dotted_key] = trial.suggest_int( parameter_name, int(parameter_specification["low"]), int(parameter_specification["high"]), log=bool(parameter_specification.get("log", False)), step=int(parameter_specification.get("step", 1)), ) continue if parameter_type == "categorical": sampled_parameter_dictionary[dotted_key] = trial.suggest_categorical( parameter_name, list(parameter_specification["choices"]), ) continue raise ValueError(f"Unsupported Optuna parameter type | {parameter_type} | {dotted_key}") return sampled_parameter_dictionary
[docs] def apply_sampled_parameter_dictionary( base_training_config: dict[str, Any], sampled_parameter_dictionary: dict[str, Any], ) -> dict[str, Any]: """Apply one sampled parameter dictionary to a cloned training config.""" prepared_training_config = clone_dictionary(base_training_config) for dotted_key, sampled_value in sampled_parameter_dictionary.items(): set_nested_dictionary_value(prepared_training_config, dotted_key, sampled_value) return prepared_training_config
[docs] def build_trial_suffix(trial_number: int) -> str: """Build one compact trial suffix.""" return f"optuna_t{trial_number:04d}"
[docs] def serialize_trial_parameter_dictionary(sampled_parameter_dictionary: dict[str, Any]) -> dict[str, Any]: """Convert one sampled parameter dictionary into YAML-safe scalar payloads.""" serialized_dictionary: dict[str, Any] = {} for dotted_key, sampled_value in sampled_parameter_dictionary.items(): if isinstance(sampled_value, (str, int, float, bool)) or sampled_value is None: serialized_dictionary[dotted_key] = sampled_value continue serialized_dictionary[dotted_key] = json.loads(json.dumps(sampled_value)) return serialized_dictionary