Coverage for src / sparkle / configurator / implementations / smac3.py: 86%
160 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 15:31 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 15:31 +0000
1"""Configurator classes to implement SMAC3 in Sparkle."""
3from __future__ import annotations
4from pathlib import Path
6from smac import version as smac_version
7from smac import Scenario as SmacScenario
8from smac import facade as smacfacades
9from smac.runhistory.enumerations import StatusType as SmacStatusType
10import numpy as np
11import random
12from typing import Optional
14from runrunner import Runner, Run
16from sparkle.configurator.configurator import Configurator, ConfigurationScenario
17from sparkle.solver import Solver
18from sparkle.structures import FeatureDataFrame, PerformanceDataFrame
19from sparkle.instance import InstanceSet, Instance_Set
20from sparkle.types import SparkleObjective, resolve_objective, SolverStatus
23class SMAC3(Configurator):
24 """Class for SMAC3 (Python) configurator."""
26 configurator_path = Path(__file__).parent.resolve() / "SMAC3"
27 configurator_target = configurator_path / "smac3_target_algorithm.py"
29 full_name = "Sequential Model-based Algorithm Configuration"
30 version = smac_version
32 def __init__(self: SMAC3) -> None:
33 """Returns the SMAC3 configurator, Python SMAC V2.3.1."""
34 return super().__init__(multi_objective_support=False)
36 @property
37 def name(self: SMAC3) -> str:
38 """Returns the name of the configurator."""
39 return SMAC3.__name__
41 @staticmethod
42 def scenario_class() -> ConfigurationScenario:
43 """Returns the SMAC3 scenario class."""
44 return SMAC3Scenario
46 @staticmethod
47 def check_requirements(verbose: bool = False) -> bool:
48 """Check that SMAC3 is installed."""
49 return True # Is automatically installed with Sparkle
51 @staticmethod
52 def download_requirements() -> None:
53 """Download SMAC3."""
54 return # Nothing to do
56 def configure(
57 self: SMAC3,
58 scenario: SMAC3Scenario,
59 data_target: PerformanceDataFrame,
60 validate_after: bool = True,
61 sbatch_options: list[str] = [],
62 slurm_prepend: str | list[str] | Path = None,
63 num_parallel_jobs: int = None,
64 base_dir: Path = None,
65 run_on: Runner = Runner.SLURM,
66 ) -> list[Run]:
67 """Start configuration job.
69 Args:
70 scenario: ConfigurationScenario object
71 data_target: PerformanceDataFrame where to store the found configurations
72 validate_after: Whether the Validator will be called after the configuration
73 sbatch_options: List of slurm batch options to use
74 slurm_prepend: Slurm script to prepend to the sbatch
75 num_parallel_jobs: The maximum number of jobs to run parallel.
76 base_dir: The path where the sbatch scripts will be created for Slurm.
77 run_on: On which platform to run the jobs. Default: Slurm.
79 Returns:
80 A RunRunner Run object.
81 """
82 scenario.create_scenario()
83 if (
84 scenario.smac3_scenario.walltime_limit
85 == scenario.smac3_scenario.cputime_limit
86 == np.inf
87 ):
88 print("WARNING: Starting SMAC3 scenario without any time limit.")
89 configuration_ids = scenario.configuration_ids
91 # Scenario file also has a seed, but not for all type of configurators
92 seeds = [random.randint(0, 2**32 - 1) for _ in range(scenario.number_of_runs)]
93 num_parallel_jobs = num_parallel_jobs or scenario.number_of_runs
94 # We do not require the configurator CLI as its already our own python wrapper
95 cmds = [
96 f"python3 {self.configurator_target.absolute()} "
97 f"{scenario.scenario_file_path.absolute()} {configuration_id} {seed} "
98 f"{data_target.csv_filepath}"
99 for configuration_id, seed in zip(configuration_ids, seeds)
100 ]
101 return super().configure(
102 configuration_commands=cmds,
103 data_target=data_target,
104 output=None,
105 scenario=scenario,
106 configuration_ids=configuration_ids,
107 validate_after=validate_after,
108 sbatch_options=sbatch_options,
109 slurm_prepend=slurm_prepend,
110 num_parallel_jobs=num_parallel_jobs,
111 base_dir=base_dir,
112 run_on=run_on,
113 )
115 @staticmethod
116 def organise_output(
117 output_source: Path,
118 output_target: Path,
119 scenario: SMAC3Scenario,
120 configuration_id: str,
121 ) -> None | str:
122 """Method to restructure and clean up after a single configurator call."""
123 import json
125 if not output_source.exists():
126 print(f"SMAC3 ERROR: Output source file does not exist! [{output_source}]")
127 return
128 results_dict = json.load(output_source.open("r"))
129 configurations = [value for _, value in results_dict["configs"].items()]
130 config_evals = [[] for _ in range(len(configurations))]
131 objective = scenario.sparkle_objective
132 for entry in results_dict["data"]:
133 smac_conf_id = entry["config_id"]
134 score = entry["cost"]
135 # SMAC3 configuration ids start at 1
136 config_evals[smac_conf_id - 1].append(score)
137 config_evals = [
138 objective.instance_aggregator(evaluations) for evaluations in config_evals
139 ]
140 best_config = configurations[
141 config_evals.index(objective.solver_aggregator(config_evals))
142 ]
143 best_config["configuration_id"] = configuration_id
144 return Configurator.save_configuration(
145 scenario, configuration_id, best_config, output_target
146 )
148 def get_status_from_logs(self: SMAC3) -> None:
149 """Method to scan the log files of the configurator for warnings."""
150 raise NotImplementedError
152 @staticmethod
153 def convert_status(status: SolverStatus) -> SmacStatusType:
154 """Converts Sparkle Solver status to SMAC3 target status."""
155 mapping = {
156 SolverStatus.SUCCESS: SmacStatusType.SUCCESS,
157 SolverStatus.CRASHED: SmacStatusType.CRASHED,
158 SolverStatus.TIMEOUT: SmacStatusType.TIMEOUT,
159 SolverStatus.WRONG: SmacStatusType.CRASHED,
160 SolverStatus.UNKNOWN: SmacStatusType.CRASHED,
161 SolverStatus.ERROR: SmacStatusType.CRASHED,
162 SolverStatus.KILLED: SmacStatusType.TIMEOUT,
163 SolverStatus.SAT: SmacStatusType.SUCCESS,
164 SolverStatus.UNSAT: SmacStatusType.SUCCESS,
165 }
166 return mapping[status]
169class SMAC3Scenario(ConfigurationScenario):
170 """Class to handle SMAC3 configuration scenarios."""
172 def __init__(
173 self: SMAC3Scenario,
174 solver: Solver,
175 instance_set: InstanceSet,
176 sparkle_objectives: list[SparkleObjective],
177 number_of_runs: int,
178 parent_directory: Path,
179 solver_cutoff_time: int = None,
180 smac_facade: smacfacades.AbstractFacade
181 | str = smacfacades.AlgorithmConfigurationFacade,
182 crash_cost: float | list[float] = np.inf,
183 termination_cost_threshold: float | list[float] = np.inf,
184 walltime_limit: float = np.inf,
185 cputime_limit: float = np.inf,
186 solver_calls: int = None,
187 use_default_config: bool = False,
188 feature_data: FeatureDataFrame | Path = None,
189 min_budget: float | int | None = None,
190 max_budget: float | int | None = None,
191 seed: int = -1,
192 n_workers: int = 1,
193 max_ratio: float = None,
194 smac3_output_directory: Path = Path(),
195 timestamp: str = None,
196 ) -> None:
197 """Initialize scenario paths and names.
199 Args:
200 solver: Solver
201 The solver to use for configuration.
202 instance_set: InstanceSet
203 The instance set to use for configuration.
204 sparkle_objectives: list[SparkleObjective]
205 The objectives to optimize.
206 number_of_runs: int
207 The number of times this scenario will be executed with different seeds.
208 parent_directory: Path
209 The parent directory where the configuration files will be stored.
210 solver_cutoff_time: int
211 Maximum CPU runtime in seconds that each solver call (trial)
212 is allowed to run. Is managed by RunSolver, not pynisher.
213 smac_facade: AbstractFacade, defaults to AlgorithmConfigurationFacade
214 The SMAC facade to use for Optimisation.
215 crash_cost: float | list[float], defaults to np.inf
216 Defines the cost for a failed trial. In case of multi-objective,
217 each objective can be associated with a different cost.
218 termination_cost_threshold: float | list[float], defaults to np.inf
219 Defines a cost threshold when the optimization should stop. In case of
220 multi-objective, each objective *must* be associated with a cost.
221 The optimization stops when all objectives crossed the threshold.
222 walltime_limit: float, defaults to np.inf
223 The maximum time in seconds that SMAC is allowed to run. Only counts
224 solver time.
225 cputime_limit: float, defaults to np.inf
226 The maximum CPU time in seconds that SMAC is allowed to run. Only counts
227 solver time.
228 solver_calls: int, defaults to None
229 The maximum number of trials (combination of configuration, seed, budget,
230 and instance, depending on the task) to run. If left as None, will be
231 calculated as int(cutoff time / cputime or walltime limit)
232 use_default_config: bool, defaults to False
233 If True, the configspace's default configuration is evaluated in the
234 initial design. For historic benchmark reasons, this is False by default.
235 Notice, that this will result in n_configs + 1 for the initial design.
236 Respecting n_trials, this will result in one fewer evaluated
237 configuration in the optimization.
238 instances: list[str] | None, defaults to None
239 Names of the instances to use. If None, no instances are used. Instances
240 could be dataset names, seeds, subsets, etc.
241 feature_data: FeatureDataFrame or Path, defaults to None
242 Instances can be associated with features. For example, meta data of
243 the dataset (mean, var, ...) can be incorporated which are then further
244 used to expand the training data of the surrogate model. If Path, loaded
245 from file. When no features are given, uses index as instance features.
246 min_budget: float | int | None, defaults to None
247 The minimum budget (epochs, subset size, number of instances, ...) that
248 is used for the optimization. Use this argument if you use multi-fidelity
249 or instance optimization.
250 max_budget: float | int | None, defaults to None
251 The maximum budget (epochs, subset size, number of instances, ...) that
252 is used for the optimization. Use this argument if you use multi-fidelity
253 or instance optimization.
254 seed: int, defaults to -1
255 The seed is used to make results reproducible.
256 If seed is -1, SMAC will generate a random seed.
257 n_workers: int, defaults to 1
258 The number of workers to use for parallelization.
259 If `n_workers` is greather than 1, SMAC will use DASK to parallelize the
260 optimization.
261 max_ratio: float, defaults to None.
262 Facade uses at most scenario.n_trials * max_ratio number of
263 configurations in the initial design. Additional configurations are not
264 affected by this parameter. Not applicable to each facade.
265 smac3_output_directory: Path, defaults to Path()
266 The output subdirectory for the SMAC3 scenario. Defaults to the scenario
267 results directory.
268 timestamp: An optional timestamp for the directory name.
269 """
270 super().__init__(
271 solver,
272 instance_set,
273 sparkle_objectives,
274 number_of_runs,
275 parent_directory,
276 timestamp,
277 )
278 self.feature_data = feature_data
279 if isinstance(self.feature_data, Path): # Load from file
280 self.feature_data = FeatureDataFrame(self.feature_data)
282 # Facade parameters
283 self.smac_facade = smac_facade
284 if isinstance(self.smac_facade, str):
285 self.smac_facade = getattr(smacfacades, self.smac_facade)
286 self.max_ratio = max_ratio
288 if self.feature_data is not None:
289 instance_features = {
290 instance: self.feature_data.get_instance(str(instance))
291 for instance in self.instance_set.instance_paths
292 }
293 else:
294 # 'If no instance features are passed, the runhistory encoder can not
295 # distinguish between different instances and therefore returns the same data
296 # points with different values, all of which are used to train the surrogate
297 # model. Consider using instance indices as features.'
298 instance_features = {
299 name: [index] for index, name in enumerate(instance_set.instance_paths)
300 }
302 # NOTE: Patchfix; SMAC3 can handle MO but Sparkle also gives non-user specified
303 # objectives but not all class methods can handle it here yet
304 self.sparkle_objective = sparkle_objectives[0]
306 # NOTE: We don't use trial_walltime_limit as a way of managing resources
307 # As it uses pynisher to do it (python based) and our targets are maybe not
308 # RunSolver is the better option for accuracy.
309 self.solver_cutoff_time = solver_cutoff_time
310 if solver_calls is None: # If solver calls is None, try to calculate it
311 if self.solver_cutoff_time is not None and (cputime_limit or walltime_limit):
312 if cputime_limit:
313 solver_calls = int(cputime_limit / self.solver_cutoff_time)
314 elif walltime_limit:
315 solver_calls = int(walltime_limit / self.solver_cutoff_time)
316 else:
317 solver_calls = 100 # SMAC3 Default value
318 self.smac3_output_directory = smac3_output_directory
319 self.crash_cost = crash_cost
320 self.termination_cost_threshold = termination_cost_threshold
321 self.walltime_limit = walltime_limit
322 self.cputime_limit = cputime_limit
323 self.solver_calls = solver_calls
324 self.use_default_config = use_default_config
325 self.instance_features = instance_features
326 self.min_budget = min_budget
327 self.max_budget = max_budget
328 self.seed = seed
329 self.n_workers = n_workers
330 self.smac3_scenario: Optional[SmacScenario] = None
332 def create_scenario(self: SMAC3Scenario) -> None:
333 """This prepares all the necessary subdirectories related to configuration."""
334 super().create_scenario()
335 self.log_dir.mkdir(parents=True)
336 if self.smac3_scenario is None:
337 self.set_smac3_scenario()
338 self.create_scenario_file()
340 def set_smac3_scenario(self: SMAC3Scenario) -> None:
341 """Set the smac scenario object."""
342 self.smac3_scenario = SmacScenario(
343 configspace=self.solver.get_configuration_space(),
344 name=self.name,
345 output_directory=self.results_directory / self.smac3_output_directory,
346 deterministic=self.solver.deterministic,
347 objectives=[self.sparkle_objective.name],
348 crash_cost=self.crash_cost,
349 termination_cost_threshold=self.termination_cost_threshold,
350 walltime_limit=self.walltime_limit,
351 cputime_limit=self.cputime_limit,
352 n_trials=self.solver_calls,
353 use_default_config=self.use_default_config,
354 instances=self.instance_set.instance_paths,
355 instance_features=self.instance_features,
356 min_budget=self.min_budget,
357 max_budget=self.max_budget,
358 seed=self.seed,
359 n_workers=self.n_workers,
360 )
362 @property
363 def log_dir(self: SMAC3Scenario) -> Path:
364 """Return the path of the log directory."""
365 if self.directory:
366 return self.directory / "logs"
367 return None
369 @property
370 def configurator(self: SMAC3Scenario) -> SMAC3:
371 """Return the type of configurator the scenario belongs to."""
372 return SMAC3
374 def create_scenario_file(self: SMAC3Scenario) -> Path:
375 """Create a file with the configuration scenario."""
376 with self.scenario_file_path.open("w") as file:
377 for key, value in self.serialise().items():
378 file.write(f"{key} = {value}\n")
380 def serialise(self: SMAC3Scenario) -> dict:
381 """Serialize the configuration scenario."""
382 feature_data = str(self.feature_data.csv_filepath) if self.feature_data else None
383 return {
384 "solver": self.solver.directory,
385 "instance_set": self.instance_set.directory,
386 "sparkle_objectives": ",".join(self.smac3_scenario.objectives),
387 "solver_cutoff_time": self.solver_cutoff_time,
388 "number_of_runs": self.number_of_runs,
389 "smac_facade": self.smac_facade.__name__,
390 "crash_cost": self.smac3_scenario.crash_cost,
391 "termination_cost_threshold": self.smac3_scenario.termination_cost_threshold,
392 "walltime_limit": self.smac3_scenario.walltime_limit,
393 "cputime_limit": self.smac3_scenario.cputime_limit,
394 "solver_calls": self.smac3_scenario.n_trials,
395 "use_default_config": self.smac3_scenario.use_default_config,
396 "feature_data": feature_data,
397 "min_budget": self.smac3_scenario.min_budget,
398 "max_budget": self.smac3_scenario.max_budget,
399 "seed": self.smac3_scenario.seed,
400 "n_workers": self.smac3_scenario.n_workers,
401 }
403 @staticmethod
404 def from_file(scenario_file: Path, run_index: int = None) -> SMAC3Scenario:
405 """Reads scenario file and initalises ConfigurationScenario.
407 Args:
408 scenario_file: Path to scenario file.
409 run_index: If given, reads as the scenario with run_index for offset
410 in output directory and seed.
412 Returns:
413 ConfigurationScenario.
414 """
415 import ast
417 variables = {
418 keyvalue[0]: keyvalue[1].strip()
419 for keyvalue in (
420 line.split(" = ", maxsplit=1)
421 for line in scenario_file.open().readlines()
422 if line.strip() != ""
423 )
424 }
425 variables["solver"] = Solver(Path(variables["solver"]))
426 variables["instance_set"] = Instance_Set(Path(variables["instance_set"]))
427 variables["sparkle_objectives"] = [
428 resolve_objective(o) for o in variables["sparkle_objectives"].split(",")
429 ]
430 variables["parent_directory"] = scenario_file.parent.parent
431 variables["solver_cutoff_time"] = int(variables["solver_cutoff_time"])
432 variables["number_of_runs"] = int(variables["number_of_runs"])
433 variables["smac_facade"] = getattr(smacfacades, variables["smac_facade"])
435 # We need to support both lists of floats and single float (np.inf is fine)
436 if variables["crash_cost"].startswith("["):
437 variables["crash_cost"] = [
438 float(v) for v in ast.literal_eval(variables["crash_cost"])
439 ]
440 else:
441 variables["crash_cost"] = float(variables["crash_cost"])
442 if variables["termination_cost_threshold"].startswith("["):
443 variables["termination_cost_threshold"] = [
444 float(v)
445 for v in ast.literal_eval(variables["termination_cost_threshold"])
446 ]
447 else:
448 variables["termination_cost_threshold"] = float(
449 variables["termination_cost_threshold"]
450 )
452 variables["walltime_limit"] = float(variables["walltime_limit"])
453 variables["cputime_limit"] = float(variables["cputime_limit"])
454 variables["solver_calls"] = ast.literal_eval(variables["solver_calls"])
455 variables["use_default_config"] = ast.literal_eval(
456 variables["use_default_config"]
457 )
459 if variables["feature_data"] != "None":
460 variables["feature_data"] = Path(variables["feature_data"])
461 else:
462 variables["feature_data"] = None
464 variables["min_budget"] = ast.literal_eval(variables["min_budget"])
465 variables["max_budget"] = ast.literal_eval(variables["max_budget"])
467 variables["seed"] = ast.literal_eval(variables["seed"])
468 variables["n_workers"] = ast.literal_eval(variables["n_workers"])
469 if run_index is not None: # Offset
470 variables["seed"] += run_index
471 variables["smac3_output_directory"] = Path(f"run_{run_index}")
473 timestamp = scenario_file.parent.name.split("_")[-1]
474 scenario = SMAC3Scenario(**variables, timestamp=timestamp)
475 scenario.set_smac3_scenario()
476 return scenario