Coverage for sparkle/configurator/implementations/smac3.py: 78%
143 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
1"""Configurator classes to implement SMAC3 in Sparkle."""
2from __future__ import annotations
3from pathlib import Path
4import shutil
6from smac import version as smac_version
7from smac import Scenario as SmacScenario
8from smac import facade as smacfacades
9from smac.runhistory.enumerations import StatusType as SmacStatusType
10import numpy as np
12from runrunner import Runner, Run
14from sparkle.configurator.configurator import Configurator, ConfigurationScenario
15from sparkle.solver import Solver
16from sparkle.structures import FeatureDataFrame, PerformanceDataFrame
17from sparkle.instance import InstanceSet, Instance_Set
18from sparkle.types import SparkleObjective, resolve_objective, SolverStatus
21class SMAC3(Configurator):
22 """Class for SMAC3 (Python) configurator."""
23 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
24 "Components/smac3-v2.2.0"
25 configurator_executable = configurator_path / "smac3_target_algorithm.py"
27 version = smac_version
28 full_name = "Sequential Model-based Algorithm Configuration"
30 def __init__(self: SMAC3,
31 base_dir: Path,
32 output_path: Path) -> None:
33 """Returns the SMAC3 configurator, Python SMAC V2.2.0.
35 Args:
36 objectives: The objectives to optimize. Only supports one objective.
37 base_dir: The path where the configurator will be executed in.
38 output_path: The path where the output will be placed.
39 """
40 output_path = output_path / SMAC3.__name__
41 output_path.mkdir(parents=True, exist_ok=True)
42 return super().__init__(
43 output_path=output_path,
44 base_dir=base_dir,
45 tmp_path=output_path / "tmp",
46 multi_objective_support=False)
48 @property
49 def name(self: SMAC3) -> str:
50 """Returns the name of the configurator."""
51 return SMAC3.__name__
53 @staticmethod
54 def scenario_class() -> ConfigurationScenario:
55 """Returns the SMAC3 scenario class."""
56 return SMAC3Scenario
58 def configure(self: SMAC3,
59 scenario: SMAC3Scenario,
60 data_target: PerformanceDataFrame,
61 validate_after: bool = True,
62 sbatch_options: list[str] = [],
63 slurm_prepend: str | list[str] | Path = None,
64 num_parallel_jobs: int = None,
65 base_dir: Path = None,
66 run_on: Runner = Runner.SLURM) -> list[Run]:
67 """Start configuration job.
69 Args:
70 scenario: ConfigurationScenario object
71 data_target: PerformanceDataFrame where to store the found configurations
72 validate_after: Whether the Validator will be called after the configuration
73 sbatch_options: List of slurm batch options to use
74 slurm_prepend: Slurm script to prepend to the sbatch
75 num_parallel_jobs: The maximum number of jobs to run parallel.
76 base_dir: The path where the sbatch scripts will be created for Slurm.
77 run_on: On which platform to run the jobs. Default: Slurm.
79 Returns:
80 A RunRunner Run object.
81 """
82 if (scenario.smac3_scenario.walltime_limit
83 == scenario.smac3_scenario.cputime_limit == np.inf):
84 print("WARNING: Starting SMAC3 scenario without any time limit.")
85 scenario.create_scenario()
86 # We set the seed over the last n run ids in the dataframe
87 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:]
88 num_parallel_jobs = num_parallel_jobs or scenario.number_of_runs
89 # We do not require the configurator CLI as its already our own python wrapper
90 cmds = [f"python3 {self.configurator_executable.absolute()} "
91 f"{scenario.scenario_file_path.absolute()} {seed} "
92 f"{data_target.csv_filepath}"
93 for seed in seeds]
94 return super().configure(
95 configuration_commands=cmds,
96 data_target=data_target,
97 output=None,
98 scenario=scenario,
99 validation_ids=seeds if validate_after else None,
100 sbatch_options=sbatch_options,
101 slurm_prepend=slurm_prepend,
102 num_parallel_jobs=num_parallel_jobs,
103 base_dir=base_dir,
104 run_on=run_on
105 )
107 @staticmethod
108 def organise_output(output_source: Path,
109 output_target: Path,
110 scenario: SMAC3Scenario,
111 run_id: int) -> None | str:
112 """Method to restructure and clean up after a single configurator call."""
113 import json
114 from filelock import FileLock
115 if not output_source.exists():
116 print(f"SMAC3 ERROR: Output source file does not exist! [{output_source}]")
117 return
118 results_dict = json.load(output_source.open("r"))
119 configurations = [value for _, value in results_dict["configs"].items()]
120 config_evals = [[] for _ in range(len(configurations))]
121 objective = scenario.sparkle_objective
122 for entry in results_dict["data"]:
123 config_id, _, _, _, score, _, _, _, _, _ = entry
124 # SMAC3 configuration ids start at 1
125 config_evals[config_id - 1].append(score)
126 config_evals = [objective.instance_aggregator(evaluations)
127 for evaluations in config_evals]
128 best_config = configurations[
129 config_evals.index(objective.solver_aggregator(config_evals))]
130 if output_target is None or not output_target.exists():
131 return best_config
133 time_stamp = scenario.scenario_file_path.stat().st_mtime
134 best_config["configuration_id"] =\
135 f"{SMAC3.__name__}_{time_stamp}_{run_id}"
136 instance_names = scenario.instance_set.instance_names
137 lock = FileLock(f"{output_target}.lock")
138 with lock.acquire(timeout=60):
139 performance_data = PerformanceDataFrame(output_target)
140 # Resolve absolute path to Solver column
141 solver = [s for s in performance_data.solvers
142 if Path(s).name == scenario.solver.name][0]
143 # For some reason the instance paths in the instance set are absolute
144 instances = [instance for instance in performance_data.instances
145 if Path(instance).name in instance_names]
146 # We don't set the seed in the dataframe, as that should be part of the conf
147 performance_data.set_value(
148 value=[str(best_config)],
149 solver=solver,
150 instance=instances,
151 objective=None,
152 run=run_id,
153 solver_fields=[PerformanceDataFrame.column_configuration]
154 )
155 performance_data.save_csv()
156 lock.release()
158 def get_status_from_logs(self: SMAC3) -> None:
159 """Method to scan the log files of the configurator for warnings."""
160 raise NotImplementedError
162 @staticmethod
163 def convert_status(status: SolverStatus) -> SmacStatusType:
164 """Converts Sparkle Solver status to SMAC3 target status."""
165 mapping = {
166 SolverStatus.SUCCESS: SmacStatusType.SUCCESS,
167 SolverStatus.CRASHED: SmacStatusType.CRASHED,
168 SolverStatus.TIMEOUT: SmacStatusType.TIMEOUT,
169 SolverStatus.WRONG: SmacStatusType.CRASHED,
170 SolverStatus.UNKNOWN: SmacStatusType.CRASHED,
171 SolverStatus.ERROR: SmacStatusType.CRASHED,
172 SolverStatus.KILLED: SmacStatusType.TIMEOUT,
173 SolverStatus.SAT: SmacStatusType.SUCCESS,
174 SolverStatus.UNSAT: SmacStatusType.SUCCESS
175 }
176 return mapping[status]
179class SMAC3Scenario(ConfigurationScenario):
180 """Class to handle SMAC3 configuration scenarios."""
182 def __init__(self: SMAC3Scenario,
183 solver: Solver,
184 instance_set: InstanceSet,
185 sparkle_objectives: list[SparkleObjective],
186 parent_directory: Path,
187 cutoff_time: int = None,
188 number_of_runs: int = None,
189 smac_facade: smacfacades.AbstractFacade | str =
190 smacfacades.AlgorithmConfigurationFacade,
191 crash_cost: float | list[float] = np.inf,
192 termination_cost_threshold: float | list[float] = np.inf,
193 walltime_limit: float = np.inf,
194 cputime_limit: float = np.inf,
195 solver_calls: int = None,
196 use_default_config: bool = False,
197 feature_data: FeatureDataFrame | Path = None,
198 min_budget: float | int | None = None,
199 max_budget: float | int | None = None,
200 seed: int = -1,
201 n_workers: int = 1,
202 max_ratio: float = None,
203 smac3_output_directory: Path = Path(),
204 ) -> None:
205 """Initialize scenario paths and names.
207 Args:
208 solver: Solver
209 The solver to use for configuration.
210 instance_set: InstanceSet
211 The instance set to use for configuration.
212 sparkle_objectives: list[SparkleObjective]
213 The objectives to optimize.
214 parent_directory: Path
215 The parent directory where the configuration files will be stored.
216 cutoff_time: int
217 Maximum CPU runtime in seconds that each solver call (trial)
218 is allowed to run. Is managed by RunSolver, not pynisher.
219 number_of_runs: int
220 The number of times this scenario will be executed with different seeds.
221 smac_facade: AbstractFacade, defaults to AlgorithmConfigurationFacade
222 The SMAC facade to use for Optimisation.
223 crash_cost: float | list[float], defaults to np.inf
224 Defines the cost for a failed trial. In case of multi-objective,
225 each objective can be associated with a different cost.
226 termination_cost_threshold: float | list[float], defaults to np.inf
227 Defines a cost threshold when the optimization should stop. In case of
228 multi-objective, each objective *must* be associated with a cost.
229 The optimization stops when all objectives crossed the threshold.
230 walltime_limit: float, defaults to np.inf
231 The maximum time in seconds that SMAC is allowed to run. Only counts
232 solver time.
233 cputime_limit: float, defaults to np.inf
234 The maximum CPU time in seconds that SMAC is allowed to run. Only counts
235 solver time. WARNING: SMAC3 uses "runtime" (walltime) for CPU time
236 when determining cputime budget.
237 solver_calls: int, defaults to None
238 The maximum number of trials (combination of configuration, seed, budget,
239 and instance, depending on the task) to run. If left as None, will be
240 calculated as int(cutoff time / cputime or walltime limit)
241 use_default_config: bool, defaults to False
242 If True, the configspace's default configuration is evaluated in the
243 initial design. For historic benchmark reasons, this is False by default.
244 Notice, that this will result in n_configs + 1 for the initial design.
245 Respecting n_trials, this will result in one fewer evaluated
246 configuration in the optimization.
247 instances: list[str] | None, defaults to None
248 Names of the instances to use. If None, no instances are used. Instances
249 could be dataset names, seeds, subsets, etc.
250 feature_data: FeatureDataFrame or Path, defaults to None
251 Instances can be associated with features. For example, meta data of
252 the dataset (mean, var, ...) can be incorporated which are then further
253 used to expand the training data of the surrogate model. If Path, loaded
254 from file. When no features are given, uses index as instance features.
255 min_budget: float | int | None, defaults to None
256 The minimum budget (epochs, subset size, number of instances, ...) that
257 is used for the optimization. Use this argument if you use multi-fidelity
258 or instance optimization.
259 max_budget: float | int | None, defaults to None
260 The maximum budget (epochs, subset size, number of instances, ...) that
261 is used for the optimization. Use this argument if you use multi-fidelity
262 or instance optimization.
263 seed: int, defaults to -1
264 The seed is used to make results reproducible.
265 If seed is -1, SMAC will generate a random seed.
266 n_workers: int, defaults to 1
267 The number of workers to use for parallelization.
268 If `n_workers` is greather than 1, SMAC will use DASK to parallelize the
269 optimization.
270 max_ratio: float, defaults to None.
271 Facade uses at most scenario.n_trials * max_ratio number of
272 configurations in the initial design. Additional configurations are not
273 affected by this parameter. Not applicable to each facade.
274 smac3_output_directory: Path, defaults to Path()
275 The output subdirectory for the SMAC3 scenario. Defaults to the scenario
276 results directory.
277 """
278 super().__init__(solver, instance_set, sparkle_objectives, parent_directory)
279 # The files are saved in `./output_directory/name/seed`.
280 self.log_dir = self.directory / "logs"
281 self.number_of_runs = number_of_runs
282 self.feature_data = feature_data
283 if isinstance(self.feature_data, Path): # Load from file
284 self.feature_data = FeatureDataFrame(self.feature_data)
286 # Facade parameters
287 self.smac_facade = smac_facade
288 if isinstance(self.smac_facade, str):
289 self.smac_facade = getattr(smacfacades, self.smac_facade)
290 self.max_ratio = max_ratio
292 if self.feature_data is not None:
293 instance_features =\
294 {instance: self.feature_data.get_instance(str(instance))
295 for instance in self.instance_set.instance_paths}
296 else:
297 # 'If no instance features are passed, the runhistory encoder can not
298 # distinguish between different instances and therefore returns the same data
299 # points with different values, all of which are used to train the surrogate
300 # model. Consider using instance indices as features.'
301 instance_features = {name: [index] for index, name
302 in enumerate(instance_set.instance_paths)}
304 # NOTE: Patchfix; SMAC3 can handle MO but Sparkle also gives non-user specified
305 # objectives but not all class methods can handle it here yet
306 self.sparkle_objective = sparkle_objectives[0]
308 # NOTE: We don't use trial_walltime_limit as a way of managing resources
309 # As it uses pynisher to do it (python based) and our targets are maybe not
310 # RunSolver is the better option for accuracy.
311 self.cutoff_time = cutoff_time
312 if solver_calls is None: # If solver calls is None, try to calculate it
313 if self.cutoff_time is not None and (cputime_limit or walltime_limit):
314 if cputime_limit:
315 solver_calls = int(cputime_limit / self.cutoff_time)
316 elif walltime_limit:
317 solver_calls = int(walltime_limit / self.cutoff_time)
318 else:
319 solver_calls = 100 # SMAC3 Default value
320 self.smac3_scenario = SmacScenario(
321 configspace=solver.get_cs(),
322 name=self.name,
323 output_directory=self.results_directory / smac3_output_directory,
324 deterministic=solver.deterministic,
325 objectives=[self.sparkle_objective.name],
326 crash_cost=crash_cost,
327 termination_cost_threshold=termination_cost_threshold,
328 walltime_limit=walltime_limit,
329 cputime_limit=cputime_limit,
330 n_trials=solver_calls,
331 use_default_config=use_default_config,
332 instances=instance_set.instance_paths,
333 instance_features=instance_features,
334 min_budget=min_budget,
335 max_budget=max_budget,
336 seed=seed,
337 n_workers=n_workers
338 )
340 def create_scenario(self: ConfigurationScenario) -> None:
341 """Create scenario with solver and instances in the parent directory.
343 This prepares all the necessary subdirectories related to configuration.
345 Args:
346 parent_directory: Directory in which the scenario should be created.
347 """
348 shutil.rmtree(self.directory, ignore_errors=True)
349 self.directory.mkdir(parents=True)
350 # Create empty directories as needed
351 self.results_directory.mkdir(parents=True) # Prepare results directory
352 self.log_dir.mkdir(parents=True)
353 self.validation.mkdir(parents=True, exist_ok=True)
354 self.create_scenario_file()
356 def create_scenario_file(self: ConfigurationScenario) -> Path:
357 """Create a file with the configuration scenario."""
358 with self.scenario_file_path.open("w") as file:
359 for key, value in self.serialize().items():
360 file.write(f"{key} = {value}\n")
362 def serialize(self: ConfigurationScenario) -> dict:
363 """Serialize the configuration scenario."""
364 feature_data =\
365 self.feature_data.csv_filepath if self.feature_data else None
366 return {
367 "solver": self.solver.directory,
368 "instance_set": self.instance_set.directory,
369 "sparkle_objectives": ",".join(self.smac3_scenario.objectives),
370 "cutoff_time": self.cutoff_time,
371 "number_of_runs": self.number_of_runs,
372 "smac_facade": self.smac_facade.__name__,
373 "crash_cost": self.smac3_scenario.crash_cost,
374 "termination_cost_threshold": self.smac3_scenario.termination_cost_threshold,
375 "walltime_limit": self.smac3_scenario.walltime_limit,
376 "cputime_limit": self.smac3_scenario.cputime_limit,
377 "solver_calls": self.smac3_scenario.n_trials,
378 "use_default_config": self.smac3_scenario.use_default_config,
379 "feature_data": feature_data,
380 "min_budget": self.smac3_scenario.min_budget,
381 "max_budget": self.smac3_scenario.max_budget,
382 "seed": self.smac3_scenario.seed,
383 "n_workers": self.smac3_scenario.n_workers,
384 }
386 @staticmethod
387 def from_file(scenario_file: Path,
388 run_index: int = None) -> ConfigurationScenario:
389 """Reads scenario file and initalises ConfigurationScenario.
391 Args:
392 scenario_file: Path to scenario file.
393 run_index: If given, reads as the scenario with run_index for offset
394 in output directory and seed.
396 Returns:
397 ConfigurationScenario.
398 """
399 import ast
400 variables = {keyvalue[0]: keyvalue[1].strip()
401 for keyvalue in (line.split(" = ", maxsplit=1)
402 for line in scenario_file.open().readlines()
403 if line.strip() != "")}
404 variables["solver"] = Solver(Path(variables["solver"]))
405 variables["instance_set"] = Instance_Set(Path(variables["instance_set"]))
406 variables["sparkle_objectives"] = [
407 resolve_objective(o)
408 for o in variables["sparkle_objectives"].split(",")]
409 variables["parent_directory"] = scenario_file.parent.parent
410 variables["cutoff_time"] = int(variables["cutoff_time"])
411 variables["number_of_runs"] = int(variables["number_of_runs"])
412 variables["smac_facade"] = getattr(smacfacades, variables["smac_facade"])
414 # We need to support both lists of floats and single float (np.inf is fine)
415 if variables["crash_cost"].startswith("["):
416 variables["crash_cost"] =\
417 [float(v) for v in ast.literal_eval(variables["crash_cost"])]
418 else:
419 variables["crash_cost"] = float(variables["crash_cost"])
420 if variables["termination_cost_threshold"].startswith("["):
421 variables["termination_cost_threshold"] =\
422 [float(v) for v in ast.literal_eval(
423 variables["termination_cost_threshold"])]
424 else:
425 variables["termination_cost_threshold"] =\
426 float(variables["termination_cost_threshold"])
428 variables["walltime_limit"] = float(variables["walltime_limit"])
429 variables["cputime_limit"] = float(variables["cputime_limit"])
430 variables["solver_calls"] = ast.literal_eval(variables["solver_calls"])
431 variables["use_default_config"] =\
432 ast.literal_eval(variables["use_default_config"])
434 if variables["feature_data"] != "None":
435 variables["feature_data"] = Path(variables["feature_data"])
436 else:
437 variables["feature_data"] = None
439 variables["min_budget"] = ast.literal_eval(variables["min_budget"])
440 variables["max_budget"] = ast.literal_eval(variables["max_budget"])
442 variables["seed"] = ast.literal_eval(variables["seed"])
443 variables["n_workers"] = ast.literal_eval(variables["n_workers"])
444 if run_index is not None: # Offset
445 variables["seed"] += run_index
446 variables["smac3_output_directory"] = Path(f"run_{run_index}")
448 return SMAC3Scenario(**variables)