Coverage for sparkle/configurator/implementations/smac3.py: 84%
139 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
1"""Configurator classes to implement SMAC3 in Sparkle."""
2from __future__ import annotations
3from pathlib import Path
4import shutil
6from smac import version as smac_version
7from smac import Scenario as SmacScenario
8from smac import facade as smacfacades
9from smac.runhistory.enumerations import StatusType as SmacStatusType
10import numpy as np
12from runrunner import Runner, Run
14from sparkle.configurator.configurator import Configurator, ConfigurationScenario
15from sparkle.solver import Solver
16from sparkle.structures import FeatureDataFrame, PerformanceDataFrame
17from sparkle.instance import InstanceSet, Instance_Set
18from sparkle.types import SparkleObjective, resolve_objective, SolverStatus
21class SMAC3(Configurator):
22 """Class for SMAC3 (Python) configurator."""
23 configurator_path = Path(__file__).parent.resolve() / "SMAC3"
24 configurator_target = configurator_path / "smac3_target_algorithm.py"
26 full_name = "Sequential Model-based Algorithm Configuration"
27 version = smac_version
29 def __init__(self: SMAC3) -> None:
30 """Returns the SMAC3 configurator, Python SMAC V2.3.1."""
31 return super().__init__(multi_objective_support=False)
33 @property
34 def name(self: SMAC3) -> str:
35 """Returns the name of the configurator."""
36 return SMAC3.__name__
38 @staticmethod
39 def scenario_class() -> ConfigurationScenario:
40 """Returns the SMAC3 scenario class."""
41 return SMAC3Scenario
43 @staticmethod
44 def check_requirements(verbose: bool = False) -> bool:
45 """Check that SMAC3 is installed."""
46 return True # Is automatically installed with Sparkle
48 @staticmethod
49 def download_requirements() -> None:
50 """Download SMAC3."""
51 return # Nothing to do
53 def configure(self: SMAC3,
54 scenario: SMAC3Scenario,
55 data_target: PerformanceDataFrame,
56 validate_after: bool = True,
57 sbatch_options: list[str] = [],
58 slurm_prepend: str | list[str] | Path = None,
59 num_parallel_jobs: int = None,
60 base_dir: Path = None,
61 run_on: Runner = Runner.SLURM) -> list[Run]:
62 """Start configuration job.
64 Args:
65 scenario: ConfigurationScenario object
66 data_target: PerformanceDataFrame where to store the found configurations
67 validate_after: Whether the Validator will be called after the configuration
68 sbatch_options: List of slurm batch options to use
69 slurm_prepend: Slurm script to prepend to the sbatch
70 num_parallel_jobs: The maximum number of jobs to run parallel.
71 base_dir: The path where the sbatch scripts will be created for Slurm.
72 run_on: On which platform to run the jobs. Default: Slurm.
74 Returns:
75 A RunRunner Run object.
76 """
77 if (scenario.smac3_scenario.walltime_limit
78 == scenario.smac3_scenario.cputime_limit == np.inf):
79 print("WARNING: Starting SMAC3 scenario without any time limit.")
80 scenario.create_scenario()
81 configuration_ids = scenario.configuration_ids
82 # TODO: Setting seeds like this is weird and should be inspected.
83 # It could be good to take perhaps a seed from the scenario and use that
84 # to generate a seed per run
85 seeds = [i for i in range(scenario.number_of_runs)]
86 num_parallel_jobs = num_parallel_jobs or scenario.number_of_runs
87 # We do not require the configurator CLI as its already our own python wrapper
88 cmds = [f"python3 {self.configurator_target.absolute()} "
89 f"{scenario.scenario_file_path.absolute()} {configuration_id} {seed} "
90 f"{data_target.csv_filepath}"
91 for configuration_id, seed in zip(configuration_ids, seeds)]
92 return super().configure(
93 configuration_commands=cmds,
94 data_target=data_target,
95 output=None,
96 scenario=scenario,
97 configuration_ids=configuration_ids,
98 validate_after=validate_after,
99 sbatch_options=sbatch_options,
100 slurm_prepend=slurm_prepend,
101 num_parallel_jobs=num_parallel_jobs,
102 base_dir=base_dir,
103 run_on=run_on
104 )
106 @staticmethod
107 def organise_output(output_source: Path,
108 output_target: Path,
109 scenario: SMAC3Scenario,
110 configuration_id: str) -> None | str:
111 """Method to restructure and clean up after a single configurator call."""
112 import json
113 if not output_source.exists():
114 print(f"SMAC3 ERROR: Output source file does not exist! [{output_source}]")
115 return
116 results_dict = json.load(output_source.open("r"))
117 configurations = [value for _, value in results_dict["configs"].items()]
118 config_evals = [[] for _ in range(len(configurations))]
119 objective = scenario.sparkle_objective
120 for entry in results_dict["data"]:
121 smac_conf_id = entry["config_id"]
122 score = entry["cost"]
123 # SMAC3 configuration ids start at 1
124 config_evals[smac_conf_id - 1].append(score)
125 config_evals = [objective.instance_aggregator(evaluations)
126 for evaluations in config_evals]
127 best_config = configurations[
128 config_evals.index(objective.solver_aggregator(config_evals))]
129 return Configurator.save_configuration(scenario, configuration_id,
130 best_config, output_target)
132 def get_status_from_logs(self: SMAC3) -> None:
133 """Method to scan the log files of the configurator for warnings."""
134 raise NotImplementedError
136 @staticmethod
137 def convert_status(status: SolverStatus) -> SmacStatusType:
138 """Converts Sparkle Solver status to SMAC3 target status."""
139 mapping = {
140 SolverStatus.SUCCESS: SmacStatusType.SUCCESS,
141 SolverStatus.CRASHED: SmacStatusType.CRASHED,
142 SolverStatus.TIMEOUT: SmacStatusType.TIMEOUT,
143 SolverStatus.WRONG: SmacStatusType.CRASHED,
144 SolverStatus.UNKNOWN: SmacStatusType.CRASHED,
145 SolverStatus.ERROR: SmacStatusType.CRASHED,
146 SolverStatus.KILLED: SmacStatusType.TIMEOUT,
147 SolverStatus.SAT: SmacStatusType.SUCCESS,
148 SolverStatus.UNSAT: SmacStatusType.SUCCESS
149 }
150 return mapping[status]
153class SMAC3Scenario(ConfigurationScenario):
154 """Class to handle SMAC3 configuration scenarios."""
156 def __init__(self: SMAC3Scenario,
157 solver: Solver,
158 instance_set: InstanceSet,
159 sparkle_objectives: list[SparkleObjective],
160 number_of_runs: int,
161 parent_directory: Path,
162 solver_cutoff_time: int = None,
163 smac_facade: smacfacades.AbstractFacade | str =
164 smacfacades.AlgorithmConfigurationFacade,
165 crash_cost: float | list[float] = np.inf,
166 termination_cost_threshold: float | list[float] = np.inf,
167 walltime_limit: float = np.inf,
168 cputime_limit: float = np.inf,
169 solver_calls: int = None,
170 use_default_config: bool = False,
171 feature_data: FeatureDataFrame | Path = None,
172 min_budget: float | int | None = None,
173 max_budget: float | int | None = None,
174 seed: int = -1,
175 n_workers: int = 1,
176 max_ratio: float = None,
177 smac3_output_directory: Path = Path(),
178 ) -> None:
179 """Initialize scenario paths and names.
181 Args:
182 solver: Solver
183 The solver to use for configuration.
184 instance_set: InstanceSet
185 The instance set to use for configuration.
186 sparkle_objectives: list[SparkleObjective]
187 The objectives to optimize.
188 number_of_runs: int
189 The number of times this scenario will be executed with different seeds.
190 parent_directory: Path
191 The parent directory where the configuration files will be stored.
192 solver_cutoff_time: int
193 Maximum CPU runtime in seconds that each solver call (trial)
194 is allowed to run. Is managed by RunSolver, not pynisher.
195 smac_facade: AbstractFacade, defaults to AlgorithmConfigurationFacade
196 The SMAC facade to use for Optimisation.
197 crash_cost: float | list[float], defaults to np.inf
198 Defines the cost for a failed trial. In case of multi-objective,
199 each objective can be associated with a different cost.
200 termination_cost_threshold: float | list[float], defaults to np.inf
201 Defines a cost threshold when the optimization should stop. In case of
202 multi-objective, each objective *must* be associated with a cost.
203 The optimization stops when all objectives crossed the threshold.
204 walltime_limit: float, defaults to np.inf
205 The maximum time in seconds that SMAC is allowed to run. Only counts
206 solver time.
207 cputime_limit: float, defaults to np.inf
208 The maximum CPU time in seconds that SMAC is allowed to run. Only counts
209 solver time.
210 solver_calls: int, defaults to None
211 The maximum number of trials (combination of configuration, seed, budget,
212 and instance, depending on the task) to run. If left as None, will be
213 calculated as int(cutoff time / cputime or walltime limit)
214 use_default_config: bool, defaults to False
215 If True, the configspace's default configuration is evaluated in the
216 initial design. For historic benchmark reasons, this is False by default.
217 Notice, that this will result in n_configs + 1 for the initial design.
218 Respecting n_trials, this will result in one fewer evaluated
219 configuration in the optimization.
220 instances: list[str] | None, defaults to None
221 Names of the instances to use. If None, no instances are used. Instances
222 could be dataset names, seeds, subsets, etc.
223 feature_data: FeatureDataFrame or Path, defaults to None
224 Instances can be associated with features. For example, meta data of
225 the dataset (mean, var, ...) can be incorporated which are then further
226 used to expand the training data of the surrogate model. If Path, loaded
227 from file. When no features are given, uses index as instance features.
228 min_budget: float | int | None, defaults to None
229 The minimum budget (epochs, subset size, number of instances, ...) that
230 is used for the optimization. Use this argument if you use multi-fidelity
231 or instance optimization.
232 max_budget: float | int | None, defaults to None
233 The maximum budget (epochs, subset size, number of instances, ...) that
234 is used for the optimization. Use this argument if you use multi-fidelity
235 or instance optimization.
236 seed: int, defaults to -1
237 The seed is used to make results reproducible.
238 If seed is -1, SMAC will generate a random seed.
239 n_workers: int, defaults to 1
240 The number of workers to use for parallelization.
241 If `n_workers` is greather than 1, SMAC will use DASK to parallelize the
242 optimization.
243 max_ratio: float, defaults to None.
244 Facade uses at most scenario.n_trials * max_ratio number of
245 configurations in the initial design. Additional configurations are not
246 affected by this parameter. Not applicable to each facade.
247 smac3_output_directory: Path, defaults to Path()
248 The output subdirectory for the SMAC3 scenario. Defaults to the scenario
249 results directory.
250 """
251 super().__init__(solver, instance_set, sparkle_objectives,
252 number_of_runs, parent_directory)
253 # The files are saved in `./output_directory/name/seed`.
254 self.log_dir = self.directory / "logs"
255 self.feature_data = feature_data
256 if isinstance(self.feature_data, Path): # Load from file
257 self.feature_data = FeatureDataFrame(self.feature_data)
259 # Facade parameters
260 self.smac_facade = smac_facade
261 if isinstance(self.smac_facade, str):
262 self.smac_facade = getattr(smacfacades, self.smac_facade)
263 self.max_ratio = max_ratio
265 if self.feature_data is not None:
266 instance_features =\
267 {instance: self.feature_data.get_instance(str(instance))
268 for instance in self.instance_set.instance_paths}
269 else:
270 # 'If no instance features are passed, the runhistory encoder can not
271 # distinguish between different instances and therefore returns the same data
272 # points with different values, all of which are used to train the surrogate
273 # model. Consider using instance indices as features.'
274 instance_features = {name: [index] for index, name
275 in enumerate(instance_set.instance_paths)}
277 # NOTE: Patchfix; SMAC3 can handle MO but Sparkle also gives non-user specified
278 # objectives but not all class methods can handle it here yet
279 self.sparkle_objective = sparkle_objectives[0]
281 # NOTE: We don't use trial_walltime_limit as a way of managing resources
282 # As it uses pynisher to do it (python based) and our targets are maybe not
283 # RunSolver is the better option for accuracy.
284 self.solver_cutoff_time = solver_cutoff_time
285 if solver_calls is None: # If solver calls is None, try to calculate it
286 if self.solver_cutoff_time is not None and (cputime_limit or walltime_limit):
287 if cputime_limit:
288 solver_calls = int(cputime_limit / self.solver_cutoff_time)
289 elif walltime_limit:
290 solver_calls = int(walltime_limit / self.solver_cutoff_time)
291 else:
292 solver_calls = 100 # SMAC3 Default value
293 self.smac3_scenario = SmacScenario(
294 configspace=solver.get_configuration_space(),
295 name=self.name,
296 output_directory=self.results_directory / smac3_output_directory,
297 deterministic=solver.deterministic,
298 objectives=[self.sparkle_objective.name],
299 crash_cost=crash_cost,
300 termination_cost_threshold=termination_cost_threshold,
301 walltime_limit=walltime_limit,
302 cputime_limit=cputime_limit,
303 n_trials=solver_calls,
304 use_default_config=use_default_config,
305 instances=instance_set.instance_paths,
306 instance_features=instance_features,
307 min_budget=min_budget,
308 max_budget=max_budget,
309 seed=seed,
310 n_workers=n_workers
311 )
313 def create_scenario(self: ConfigurationScenario) -> None:
314 """Create scenario with solver and instances in the parent directory.
316 This prepares all the necessary subdirectories related to configuration.
318 Args:
319 parent_directory: Directory in which the scenario should be created.
320 """
321 shutil.rmtree(self.directory, ignore_errors=True)
322 self.directory.mkdir(parents=True)
323 # Create empty directories as needed
324 self.results_directory.mkdir(parents=True) # Prepare results directory
325 self.log_dir.mkdir(parents=True)
326 self.validation.mkdir(parents=True, exist_ok=True)
327 self.create_scenario_file()
329 @property
330 def configurator(self: SMAC3Scenario) -> SMAC3:
331 """Return the type of configurator the scenario belongs to."""
332 return SMAC3
334 def create_scenario_file(self: SMAC3Scenario) -> Path:
335 """Create a file with the configuration scenario."""
336 super().create_scenario_file()
337 with self.scenario_file_path.open("w") as file:
338 for key, value in self.serialise().items():
339 file.write(f"{key} = {value}\n")
341 def serialise(self: SMAC3Scenario) -> dict:
342 """Serialize the configuration scenario."""
343 feature_data =\
344 self.feature_data.csv_filepath if self.feature_data else None
345 return {
346 "solver": self.solver.directory,
347 "instance_set": self.instance_set.directory,
348 "sparkle_objectives": ",".join(self.smac3_scenario.objectives),
349 "solver_cutoff_time": self.solver_cutoff_time,
350 "number_of_runs": self.number_of_runs,
351 "smac_facade": self.smac_facade.__name__,
352 "crash_cost": self.smac3_scenario.crash_cost,
353 "termination_cost_threshold": self.smac3_scenario.termination_cost_threshold,
354 "walltime_limit": self.smac3_scenario.walltime_limit,
355 "cputime_limit": self.smac3_scenario.cputime_limit,
356 "solver_calls": self.smac3_scenario.n_trials,
357 "use_default_config": self.smac3_scenario.use_default_config,
358 "feature_data": feature_data,
359 "min_budget": self.smac3_scenario.min_budget,
360 "max_budget": self.smac3_scenario.max_budget,
361 "seed": self.smac3_scenario.seed,
362 "n_workers": self.smac3_scenario.n_workers,
363 }
365 @staticmethod
366 def from_file(scenario_file: Path,
367 run_index: int = None) -> SMAC3Scenario:
368 """Reads scenario file and initalises ConfigurationScenario.
370 Args:
371 scenario_file: Path to scenario file.
372 run_index: If given, reads as the scenario with run_index for offset
373 in output directory and seed.
375 Returns:
376 ConfigurationScenario.
377 """
378 import ast
379 variables = {keyvalue[0]: keyvalue[1].strip()
380 for keyvalue in (line.split(" = ", maxsplit=1)
381 for line in scenario_file.open().readlines()
382 if line.strip() != "")}
383 variables["solver"] = Solver(Path(variables["solver"]))
384 variables["instance_set"] = Instance_Set(Path(variables["instance_set"]))
385 variables["sparkle_objectives"] = [
386 resolve_objective(o)
387 for o in variables["sparkle_objectives"].split(",")]
388 variables["parent_directory"] = scenario_file.parent.parent
389 variables["solver_cutoff_time"] = int(variables["solver_cutoff_time"])
390 variables["number_of_runs"] = int(variables["number_of_runs"])
391 variables["smac_facade"] = getattr(smacfacades, variables["smac_facade"])
393 # We need to support both lists of floats and single float (np.inf is fine)
394 if variables["crash_cost"].startswith("["):
395 variables["crash_cost"] =\
396 [float(v) for v in ast.literal_eval(variables["crash_cost"])]
397 else:
398 variables["crash_cost"] = float(variables["crash_cost"])
399 if variables["termination_cost_threshold"].startswith("["):
400 variables["termination_cost_threshold"] =\
401 [float(v) for v in ast.literal_eval(
402 variables["termination_cost_threshold"])]
403 else:
404 variables["termination_cost_threshold"] =\
405 float(variables["termination_cost_threshold"])
407 variables["walltime_limit"] = float(variables["walltime_limit"])
408 variables["cputime_limit"] = float(variables["cputime_limit"])
409 variables["solver_calls"] = ast.literal_eval(variables["solver_calls"])
410 variables["use_default_config"] =\
411 ast.literal_eval(variables["use_default_config"])
413 if variables["feature_data"] != "None":
414 variables["feature_data"] = Path(variables["feature_data"])
415 else:
416 variables["feature_data"] = None
418 variables["min_budget"] = ast.literal_eval(variables["min_budget"])
419 variables["max_budget"] = ast.literal_eval(variables["max_budget"])
421 variables["seed"] = ast.literal_eval(variables["seed"])
422 variables["n_workers"] = ast.literal_eval(variables["n_workers"])
423 if run_index is not None: # Offset
424 variables["seed"] += run_index
425 variables["smac3_output_directory"] = Path(f"run_{run_index}")
427 return SMAC3Scenario(**variables)