Coverage for sparkle/configurator/implementations/smac3.py: 86%
159 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
1"""Configurator classes to implement SMAC3 in Sparkle."""
3from __future__ import annotations
4from pathlib import Path
6from smac import version as smac_version
7from smac import Scenario as SmacScenario
8from smac import facade as smacfacades
9from smac.runhistory.enumerations import StatusType as SmacStatusType
10import numpy as np
11import random
12from typing import Optional
14from runrunner import Runner, Run
16from sparkle.configurator.configurator import Configurator, ConfigurationScenario
17from sparkle.solver import Solver
18from sparkle.structures import FeatureDataFrame, PerformanceDataFrame
19from sparkle.instance import InstanceSet, Instance_Set
20from sparkle.types import SparkleObjective, resolve_objective, SolverStatus
23class SMAC3(Configurator):
24 """Class for SMAC3 (Python) configurator."""
26 configurator_path = Path(__file__).parent.resolve() / "SMAC3"
27 configurator_target = configurator_path / "smac3_target_algorithm.py"
29 full_name = "Sequential Model-based Algorithm Configuration"
30 version = smac_version
32 def __init__(self: SMAC3) -> None:
33 """Returns the SMAC3 configurator, Python SMAC V2.3.1."""
34 return super().__init__(multi_objective_support=False)
36 @property
37 def name(self: SMAC3) -> str:
38 """Returns the name of the configurator."""
39 return SMAC3.__name__
41 @staticmethod
42 def scenario_class() -> ConfigurationScenario:
43 """Returns the SMAC3 scenario class."""
44 return SMAC3Scenario
46 @staticmethod
47 def check_requirements(verbose: bool = False) -> bool:
48 """Check that SMAC3 is installed."""
49 return True # Is automatically installed with Sparkle
51 @staticmethod
52 def download_requirements() -> None:
53 """Download SMAC3."""
54 return # Nothing to do
56 def configure(
57 self: SMAC3,
58 scenario: SMAC3Scenario,
59 data_target: PerformanceDataFrame,
60 validate_after: bool = True,
61 sbatch_options: list[str] = [],
62 slurm_prepend: str | list[str] | Path = None,
63 num_parallel_jobs: int = None,
64 base_dir: Path = None,
65 run_on: Runner = Runner.SLURM,
66 ) -> list[Run]:
67 """Start configuration job.
69 Args:
70 scenario: ConfigurationScenario object
71 data_target: PerformanceDataFrame where to store the found configurations
72 validate_after: Whether the Validator will be called after the configuration
73 sbatch_options: List of slurm batch options to use
74 slurm_prepend: Slurm script to prepend to the sbatch
75 num_parallel_jobs: The maximum number of jobs to run parallel.
76 base_dir: The path where the sbatch scripts will be created for Slurm.
77 run_on: On which platform to run the jobs. Default: Slurm.
79 Returns:
80 A RunRunner Run object.
81 """
82 scenario.create_scenario()
83 if (
84 scenario.smac3_scenario.walltime_limit
85 == scenario.smac3_scenario.cputime_limit
86 == np.inf
87 ):
88 print("WARNING: Starting SMAC3 scenario without any time limit.")
89 configuration_ids = scenario.configuration_ids
91 # Scenario file also has a seed, but not for all type of configurators
92 seeds = [random.randint(0, 2**32 - 1) for _ in range(scenario.number_of_runs)]
93 num_parallel_jobs = num_parallel_jobs or scenario.number_of_runs
94 # We do not require the configurator CLI as its already our own python wrapper
95 cmds = [
96 f"python3 {self.configurator_target.absolute()} "
97 f"{scenario.scenario_file_path.absolute()} {configuration_id} {seed} "
98 f"{data_target.csv_filepath}"
99 for configuration_id, seed in zip(configuration_ids, seeds)
100 ]
101 return super().configure(
102 configuration_commands=cmds,
103 data_target=data_target,
104 output=None,
105 scenario=scenario,
106 configuration_ids=configuration_ids,
107 validate_after=validate_after,
108 sbatch_options=sbatch_options,
109 slurm_prepend=slurm_prepend,
110 num_parallel_jobs=num_parallel_jobs,
111 base_dir=base_dir,
112 run_on=run_on,
113 )
115 @staticmethod
116 def organise_output(
117 output_source: Path,
118 output_target: Path,
119 scenario: SMAC3Scenario,
120 configuration_id: str,
121 ) -> None | str:
122 """Method to restructure and clean up after a single configurator call."""
123 import json
125 if not output_source.exists():
126 print(f"SMAC3 ERROR: Output source file does not exist! [{output_source}]")
127 return
128 results_dict = json.load(output_source.open("r"))
129 configurations = [value for _, value in results_dict["configs"].items()]
130 config_evals = [[] for _ in range(len(configurations))]
131 objective = scenario.sparkle_objective
132 for entry in results_dict["data"]:
133 smac_conf_id = entry["config_id"]
134 score = entry["cost"]
135 # SMAC3 configuration ids start at 1
136 config_evals[smac_conf_id - 1].append(score)
137 config_evals = [
138 objective.instance_aggregator(evaluations) for evaluations in config_evals
139 ]
140 best_config = configurations[
141 config_evals.index(objective.solver_aggregator(config_evals))
142 ]
143 return Configurator.save_configuration(
144 scenario, configuration_id, best_config, output_target
145 )
147 def get_status_from_logs(self: SMAC3) -> None:
148 """Method to scan the log files of the configurator for warnings."""
149 raise NotImplementedError
151 @staticmethod
152 def convert_status(status: SolverStatus) -> SmacStatusType:
153 """Converts Sparkle Solver status to SMAC3 target status."""
154 mapping = {
155 SolverStatus.SUCCESS: SmacStatusType.SUCCESS,
156 SolverStatus.CRASHED: SmacStatusType.CRASHED,
157 SolverStatus.TIMEOUT: SmacStatusType.TIMEOUT,
158 SolverStatus.WRONG: SmacStatusType.CRASHED,
159 SolverStatus.UNKNOWN: SmacStatusType.CRASHED,
160 SolverStatus.ERROR: SmacStatusType.CRASHED,
161 SolverStatus.KILLED: SmacStatusType.TIMEOUT,
162 SolverStatus.SAT: SmacStatusType.SUCCESS,
163 SolverStatus.UNSAT: SmacStatusType.SUCCESS,
164 }
165 return mapping[status]
168class SMAC3Scenario(ConfigurationScenario):
169 """Class to handle SMAC3 configuration scenarios."""
171 def __init__(
172 self: SMAC3Scenario,
173 solver: Solver,
174 instance_set: InstanceSet,
175 sparkle_objectives: list[SparkleObjective],
176 number_of_runs: int,
177 parent_directory: Path,
178 solver_cutoff_time: int = None,
179 smac_facade: smacfacades.AbstractFacade
180 | str = smacfacades.AlgorithmConfigurationFacade,
181 crash_cost: float | list[float] = np.inf,
182 termination_cost_threshold: float | list[float] = np.inf,
183 walltime_limit: float = np.inf,
184 cputime_limit: float = np.inf,
185 solver_calls: int = None,
186 use_default_config: bool = False,
187 feature_data: FeatureDataFrame | Path = None,
188 min_budget: float | int | None = None,
189 max_budget: float | int | None = None,
190 seed: int = -1,
191 n_workers: int = 1,
192 max_ratio: float = None,
193 smac3_output_directory: Path = Path(),
194 timestamp: str = None,
195 ) -> None:
196 """Initialize scenario paths and names.
198 Args:
199 solver: Solver
200 The solver to use for configuration.
201 instance_set: InstanceSet
202 The instance set to use for configuration.
203 sparkle_objectives: list[SparkleObjective]
204 The objectives to optimize.
205 number_of_runs: int
206 The number of times this scenario will be executed with different seeds.
207 parent_directory: Path
208 The parent directory where the configuration files will be stored.
209 solver_cutoff_time: int
210 Maximum CPU runtime in seconds that each solver call (trial)
211 is allowed to run. Is managed by RunSolver, not pynisher.
212 smac_facade: AbstractFacade, defaults to AlgorithmConfigurationFacade
213 The SMAC facade to use for Optimisation.
214 crash_cost: float | list[float], defaults to np.inf
215 Defines the cost for a failed trial. In case of multi-objective,
216 each objective can be associated with a different cost.
217 termination_cost_threshold: float | list[float], defaults to np.inf
218 Defines a cost threshold when the optimization should stop. In case of
219 multi-objective, each objective *must* be associated with a cost.
220 The optimization stops when all objectives crossed the threshold.
221 walltime_limit: float, defaults to np.inf
222 The maximum time in seconds that SMAC is allowed to run. Only counts
223 solver time.
224 cputime_limit: float, defaults to np.inf
225 The maximum CPU time in seconds that SMAC is allowed to run. Only counts
226 solver time.
227 solver_calls: int, defaults to None
228 The maximum number of trials (combination of configuration, seed, budget,
229 and instance, depending on the task) to run. If left as None, will be
230 calculated as int(cutoff time / cputime or walltime limit)
231 use_default_config: bool, defaults to False
232 If True, the configspace's default configuration is evaluated in the
233 initial design. For historic benchmark reasons, this is False by default.
234 Notice, that this will result in n_configs + 1 for the initial design.
235 Respecting n_trials, this will result in one fewer evaluated
236 configuration in the optimization.
237 instances: list[str] | None, defaults to None
238 Names of the instances to use. If None, no instances are used. Instances
239 could be dataset names, seeds, subsets, etc.
240 feature_data: FeatureDataFrame or Path, defaults to None
241 Instances can be associated with features. For example, meta data of
242 the dataset (mean, var, ...) can be incorporated which are then further
243 used to expand the training data of the surrogate model. If Path, loaded
244 from file. When no features are given, uses index as instance features.
245 min_budget: float | int | None, defaults to None
246 The minimum budget (epochs, subset size, number of instances, ...) that
247 is used for the optimization. Use this argument if you use multi-fidelity
248 or instance optimization.
249 max_budget: float | int | None, defaults to None
250 The maximum budget (epochs, subset size, number of instances, ...) that
251 is used for the optimization. Use this argument if you use multi-fidelity
252 or instance optimization.
253 seed: int, defaults to -1
254 The seed is used to make results reproducible.
255 If seed is -1, SMAC will generate a random seed.
256 n_workers: int, defaults to 1
257 The number of workers to use for parallelization.
258 If `n_workers` is greather than 1, SMAC will use DASK to parallelize the
259 optimization.
260 max_ratio: float, defaults to None.
261 Facade uses at most scenario.n_trials * max_ratio number of
262 configurations in the initial design. Additional configurations are not
263 affected by this parameter. Not applicable to each facade.
264 smac3_output_directory: Path, defaults to Path()
265 The output subdirectory for the SMAC3 scenario. Defaults to the scenario
266 results directory.
267 timestamp: An optional timestamp for the directory name.
268 """
269 super().__init__(
270 solver,
271 instance_set,
272 sparkle_objectives,
273 number_of_runs,
274 parent_directory,
275 timestamp,
276 )
277 self.feature_data = feature_data
278 if isinstance(self.feature_data, Path): # Load from file
279 self.feature_data = FeatureDataFrame(self.feature_data)
281 # Facade parameters
282 self.smac_facade = smac_facade
283 if isinstance(self.smac_facade, str):
284 self.smac_facade = getattr(smacfacades, self.smac_facade)
285 self.max_ratio = max_ratio
287 if self.feature_data is not None:
288 instance_features = {
289 instance: self.feature_data.get_instance(str(instance))
290 for instance in self.instance_set.instance_paths
291 }
292 else:
293 # 'If no instance features are passed, the runhistory encoder can not
294 # distinguish between different instances and therefore returns the same data
295 # points with different values, all of which are used to train the surrogate
296 # model. Consider using instance indices as features.'
297 instance_features = {
298 name: [index] for index, name in enumerate(instance_set.instance_paths)
299 }
301 # NOTE: Patchfix; SMAC3 can handle MO but Sparkle also gives non-user specified
302 # objectives but not all class methods can handle it here yet
303 self.sparkle_objective = sparkle_objectives[0]
305 # NOTE: We don't use trial_walltime_limit as a way of managing resources
306 # As it uses pynisher to do it (python based) and our targets are maybe not
307 # RunSolver is the better option for accuracy.
308 self.solver_cutoff_time = solver_cutoff_time
309 if solver_calls is None: # If solver calls is None, try to calculate it
310 if self.solver_cutoff_time is not None and (cputime_limit or walltime_limit):
311 if cputime_limit:
312 solver_calls = int(cputime_limit / self.solver_cutoff_time)
313 elif walltime_limit:
314 solver_calls = int(walltime_limit / self.solver_cutoff_time)
315 else:
316 solver_calls = 100 # SMAC3 Default value
317 self.smac3_output_directory = smac3_output_directory
318 self.crash_cost = crash_cost
319 self.termination_cost_threshold = termination_cost_threshold
320 self.walltime_limit = walltime_limit
321 self.cputime_limit = cputime_limit
322 self.solver_calls = solver_calls
323 self.use_default_config = use_default_config
324 self.instance_features = instance_features
325 self.min_budget = min_budget
326 self.max_budget = max_budget
327 self.seed = seed
328 self.n_workers = n_workers
329 self.smac3_scenario: Optional[SmacScenario] = None
331 def create_scenario(self: SMAC3Scenario) -> None:
332 """This prepares all the necessary subdirectories related to configuration."""
333 super().create_scenario()
334 self.log_dir.mkdir(parents=True)
335 if self.smac3_scenario is None:
336 self.set_smac3_scenario()
337 self.create_scenario_file()
339 def set_smac3_scenario(self: SMAC3Scenario) -> None:
340 """Set the smac scenario object."""
341 self.smac3_scenario = SmacScenario(
342 configspace=self.solver.get_configuration_space(),
343 name=self.name,
344 output_directory=self.results_directory / self.smac3_output_directory,
345 deterministic=self.solver.deterministic,
346 objectives=[self.sparkle_objective.name],
347 crash_cost=self.crash_cost,
348 termination_cost_threshold=self.termination_cost_threshold,
349 walltime_limit=self.walltime_limit,
350 cputime_limit=self.cputime_limit,
351 n_trials=self.solver_calls,
352 use_default_config=self.use_default_config,
353 instances=self.instance_set.instance_paths,
354 instance_features=self.instance_features,
355 min_budget=self.min_budget,
356 max_budget=self.max_budget,
357 seed=self.seed,
358 n_workers=self.n_workers,
359 )
361 @property
362 def log_dir(self: SMAC3Scenario) -> Path:
363 """Return the path of the log directory."""
364 if self.directory:
365 return self.directory / "logs"
366 return None
368 @property
369 def configurator(self: SMAC3Scenario) -> SMAC3:
370 """Return the type of configurator the scenario belongs to."""
371 return SMAC3
373 def create_scenario_file(self: SMAC3Scenario) -> Path:
374 """Create a file with the configuration scenario."""
375 with self.scenario_file_path.open("w") as file:
376 for key, value in self.serialise().items():
377 file.write(f"{key} = {value}\n")
379 def serialise(self: SMAC3Scenario) -> dict:
380 """Serialize the configuration scenario."""
381 feature_data = str(self.feature_data.csv_filepath) if self.feature_data else None
382 return {
383 "solver": self.solver.directory,
384 "instance_set": self.instance_set.directory,
385 "sparkle_objectives": ",".join(self.smac3_scenario.objectives),
386 "solver_cutoff_time": self.solver_cutoff_time,
387 "number_of_runs": self.number_of_runs,
388 "smac_facade": self.smac_facade.__name__,
389 "crash_cost": self.smac3_scenario.crash_cost,
390 "termination_cost_threshold": self.smac3_scenario.termination_cost_threshold,
391 "walltime_limit": self.smac3_scenario.walltime_limit,
392 "cputime_limit": self.smac3_scenario.cputime_limit,
393 "solver_calls": self.smac3_scenario.n_trials,
394 "use_default_config": self.smac3_scenario.use_default_config,
395 "feature_data": feature_data,
396 "min_budget": self.smac3_scenario.min_budget,
397 "max_budget": self.smac3_scenario.max_budget,
398 "seed": self.smac3_scenario.seed,
399 "n_workers": self.smac3_scenario.n_workers,
400 }
402 @staticmethod
403 def from_file(scenario_file: Path, run_index: int = None) -> SMAC3Scenario:
404 """Reads scenario file and initalises ConfigurationScenario.
406 Args:
407 scenario_file: Path to scenario file.
408 run_index: If given, reads as the scenario with run_index for offset
409 in output directory and seed.
411 Returns:
412 ConfigurationScenario.
413 """
414 import ast
416 variables = {
417 keyvalue[0]: keyvalue[1].strip()
418 for keyvalue in (
419 line.split(" = ", maxsplit=1)
420 for line in scenario_file.open().readlines()
421 if line.strip() != ""
422 )
423 }
424 variables["solver"] = Solver(Path(variables["solver"]))
425 variables["instance_set"] = Instance_Set(Path(variables["instance_set"]))
426 variables["sparkle_objectives"] = [
427 resolve_objective(o) for o in variables["sparkle_objectives"].split(",")
428 ]
429 variables["parent_directory"] = scenario_file.parent.parent
430 variables["solver_cutoff_time"] = int(variables["solver_cutoff_time"])
431 variables["number_of_runs"] = int(variables["number_of_runs"])
432 variables["smac_facade"] = getattr(smacfacades, variables["smac_facade"])
434 # We need to support both lists of floats and single float (np.inf is fine)
435 if variables["crash_cost"].startswith("["):
436 variables["crash_cost"] = [
437 float(v) for v in ast.literal_eval(variables["crash_cost"])
438 ]
439 else:
440 variables["crash_cost"] = float(variables["crash_cost"])
441 if variables["termination_cost_threshold"].startswith("["):
442 variables["termination_cost_threshold"] = [
443 float(v)
444 for v in ast.literal_eval(variables["termination_cost_threshold"])
445 ]
446 else:
447 variables["termination_cost_threshold"] = float(
448 variables["termination_cost_threshold"]
449 )
451 variables["walltime_limit"] = float(variables["walltime_limit"])
452 variables["cputime_limit"] = float(variables["cputime_limit"])
453 variables["solver_calls"] = ast.literal_eval(variables["solver_calls"])
454 variables["use_default_config"] = ast.literal_eval(
455 variables["use_default_config"]
456 )
458 if variables["feature_data"] != "None":
459 variables["feature_data"] = Path(variables["feature_data"])
460 else:
461 variables["feature_data"] = None
463 variables["min_budget"] = ast.literal_eval(variables["min_budget"])
464 variables["max_budget"] = ast.literal_eval(variables["max_budget"])
466 variables["seed"] = ast.literal_eval(variables["seed"])
467 variables["n_workers"] = ast.literal_eval(variables["n_workers"])
468 if run_index is not None: # Offset
469 variables["seed"] += run_index
470 variables["smac3_output_directory"] = Path(f"run_{run_index}")
472 timestamp = scenario_file.parent.name.split("_")[-1]
473 scenario = SMAC3Scenario(**variables, timestamp=timestamp)
474 scenario.set_smac3_scenario()
475 return scenario