Coverage for src / sparkle / configurator / implementations / smac3.py: 86%

160 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 15:31 +0000

1"""Configurator classes to implement SMAC3 in Sparkle.""" 

2 

3from __future__ import annotations 

4from pathlib import Path 

5 

6from smac import version as smac_version 

7from smac import Scenario as SmacScenario 

8from smac import facade as smacfacades 

9from smac.runhistory.enumerations import StatusType as SmacStatusType 

10import numpy as np 

11import random 

12from typing import Optional 

13 

14from runrunner import Runner, Run 

15 

16from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

17from sparkle.solver import Solver 

18from sparkle.structures import FeatureDataFrame, PerformanceDataFrame 

19from sparkle.instance import InstanceSet, Instance_Set 

20from sparkle.types import SparkleObjective, resolve_objective, SolverStatus 

21 

22 

23class SMAC3(Configurator): 

24 """Class for SMAC3 (Python) configurator.""" 

25 

26 configurator_path = Path(__file__).parent.resolve() / "SMAC3" 

27 configurator_target = configurator_path / "smac3_target_algorithm.py" 

28 

29 full_name = "Sequential Model-based Algorithm Configuration" 

30 version = smac_version 

31 

32 def __init__(self: SMAC3) -> None: 

33 """Returns the SMAC3 configurator, Python SMAC V2.3.1.""" 

34 return super().__init__(multi_objective_support=False) 

35 

36 @property 

37 def name(self: SMAC3) -> str: 

38 """Returns the name of the configurator.""" 

39 return SMAC3.__name__ 

40 

41 @staticmethod 

42 def scenario_class() -> ConfigurationScenario: 

43 """Returns the SMAC3 scenario class.""" 

44 return SMAC3Scenario 

45 

46 @staticmethod 

47 def check_requirements(verbose: bool = False) -> bool: 

48 """Check that SMAC3 is installed.""" 

49 return True # Is automatically installed with Sparkle 

50 

51 @staticmethod 

52 def download_requirements() -> None: 

53 """Download SMAC3.""" 

54 return # Nothing to do 

55 

56 def configure( 

57 self: SMAC3, 

58 scenario: SMAC3Scenario, 

59 data_target: PerformanceDataFrame, 

60 validate_after: bool = True, 

61 sbatch_options: list[str] = [], 

62 slurm_prepend: str | list[str] | Path = None, 

63 num_parallel_jobs: int = None, 

64 base_dir: Path = None, 

65 run_on: Runner = Runner.SLURM, 

66 ) -> list[Run]: 

67 """Start configuration job. 

68 

69 Args: 

70 scenario: ConfigurationScenario object 

71 data_target: PerformanceDataFrame where to store the found configurations 

72 validate_after: Whether the Validator will be called after the configuration 

73 sbatch_options: List of slurm batch options to use 

74 slurm_prepend: Slurm script to prepend to the sbatch 

75 num_parallel_jobs: The maximum number of jobs to run parallel. 

76 base_dir: The path where the sbatch scripts will be created for Slurm. 

77 run_on: On which platform to run the jobs. Default: Slurm. 

78 

79 Returns: 

80 A RunRunner Run object. 

81 """ 

82 scenario.create_scenario() 

83 if ( 

84 scenario.smac3_scenario.walltime_limit 

85 == scenario.smac3_scenario.cputime_limit 

86 == np.inf 

87 ): 

88 print("WARNING: Starting SMAC3 scenario without any time limit.") 

89 configuration_ids = scenario.configuration_ids 

90 

91 # Scenario file also has a seed, but not for all type of configurators 

92 seeds = [random.randint(0, 2**32 - 1) for _ in range(scenario.number_of_runs)] 

93 num_parallel_jobs = num_parallel_jobs or scenario.number_of_runs 

94 # We do not require the configurator CLI as its already our own python wrapper 

95 cmds = [ 

96 f"python3 {self.configurator_target.absolute()} " 

97 f"{scenario.scenario_file_path.absolute()} {configuration_id} {seed} " 

98 f"{data_target.csv_filepath}" 

99 for configuration_id, seed in zip(configuration_ids, seeds) 

100 ] 

101 return super().configure( 

102 configuration_commands=cmds, 

103 data_target=data_target, 

104 output=None, 

105 scenario=scenario, 

106 configuration_ids=configuration_ids, 

107 validate_after=validate_after, 

108 sbatch_options=sbatch_options, 

109 slurm_prepend=slurm_prepend, 

110 num_parallel_jobs=num_parallel_jobs, 

111 base_dir=base_dir, 

112 run_on=run_on, 

113 ) 

114 

115 @staticmethod 

116 def organise_output( 

117 output_source: Path, 

118 output_target: Path, 

119 scenario: SMAC3Scenario, 

120 configuration_id: str, 

121 ) -> None | str: 

122 """Method to restructure and clean up after a single configurator call.""" 

123 import json 

124 

125 if not output_source.exists(): 

126 print(f"SMAC3 ERROR: Output source file does not exist! [{output_source}]") 

127 return 

128 results_dict = json.load(output_source.open("r")) 

129 configurations = [value for _, value in results_dict["configs"].items()] 

130 config_evals = [[] for _ in range(len(configurations))] 

131 objective = scenario.sparkle_objective 

132 for entry in results_dict["data"]: 

133 smac_conf_id = entry["config_id"] 

134 score = entry["cost"] 

135 # SMAC3 configuration ids start at 1 

136 config_evals[smac_conf_id - 1].append(score) 

137 config_evals = [ 

138 objective.instance_aggregator(evaluations) for evaluations in config_evals 

139 ] 

140 best_config = configurations[ 

141 config_evals.index(objective.solver_aggregator(config_evals)) 

142 ] 

143 best_config["configuration_id"] = configuration_id 

144 return Configurator.save_configuration( 

145 scenario, configuration_id, best_config, output_target 

146 ) 

147 

148 def get_status_from_logs(self: SMAC3) -> None: 

149 """Method to scan the log files of the configurator for warnings.""" 

150 raise NotImplementedError 

151 

152 @staticmethod 

153 def convert_status(status: SolverStatus) -> SmacStatusType: 

154 """Converts Sparkle Solver status to SMAC3 target status.""" 

155 mapping = { 

156 SolverStatus.SUCCESS: SmacStatusType.SUCCESS, 

157 SolverStatus.CRASHED: SmacStatusType.CRASHED, 

158 SolverStatus.TIMEOUT: SmacStatusType.TIMEOUT, 

159 SolverStatus.WRONG: SmacStatusType.CRASHED, 

160 SolverStatus.UNKNOWN: SmacStatusType.CRASHED, 

161 SolverStatus.ERROR: SmacStatusType.CRASHED, 

162 SolverStatus.KILLED: SmacStatusType.TIMEOUT, 

163 SolverStatus.SAT: SmacStatusType.SUCCESS, 

164 SolverStatus.UNSAT: SmacStatusType.SUCCESS, 

165 } 

166 return mapping[status] 

167 

168 

169class SMAC3Scenario(ConfigurationScenario): 

170 """Class to handle SMAC3 configuration scenarios.""" 

171 

172 def __init__( 

173 self: SMAC3Scenario, 

174 solver: Solver, 

175 instance_set: InstanceSet, 

176 sparkle_objectives: list[SparkleObjective], 

177 number_of_runs: int, 

178 parent_directory: Path, 

179 solver_cutoff_time: int = None, 

180 smac_facade: smacfacades.AbstractFacade 

181 | str = smacfacades.AlgorithmConfigurationFacade, 

182 crash_cost: float | list[float] = np.inf, 

183 termination_cost_threshold: float | list[float] = np.inf, 

184 walltime_limit: float = np.inf, 

185 cputime_limit: float = np.inf, 

186 solver_calls: int = None, 

187 use_default_config: bool = False, 

188 feature_data: FeatureDataFrame | Path = None, 

189 min_budget: float | int | None = None, 

190 max_budget: float | int | None = None, 

191 seed: int = -1, 

192 n_workers: int = 1, 

193 max_ratio: float = None, 

194 smac3_output_directory: Path = Path(), 

195 timestamp: str = None, 

196 ) -> None: 

197 """Initialize scenario paths and names. 

198 

199 Args: 

200 solver: Solver 

201 The solver to use for configuration. 

202 instance_set: InstanceSet 

203 The instance set to use for configuration. 

204 sparkle_objectives: list[SparkleObjective] 

205 The objectives to optimize. 

206 number_of_runs: int 

207 The number of times this scenario will be executed with different seeds. 

208 parent_directory: Path 

209 The parent directory where the configuration files will be stored. 

210 solver_cutoff_time: int 

211 Maximum CPU runtime in seconds that each solver call (trial) 

212 is allowed to run. Is managed by RunSolver, not pynisher. 

213 smac_facade: AbstractFacade, defaults to AlgorithmConfigurationFacade 

214 The SMAC facade to use for Optimisation. 

215 crash_cost: float | list[float], defaults to np.inf 

216 Defines the cost for a failed trial. In case of multi-objective, 

217 each objective can be associated with a different cost. 

218 termination_cost_threshold: float | list[float], defaults to np.inf 

219 Defines a cost threshold when the optimization should stop. In case of 

220 multi-objective, each objective *must* be associated with a cost. 

221 The optimization stops when all objectives crossed the threshold. 

222 walltime_limit: float, defaults to np.inf 

223 The maximum time in seconds that SMAC is allowed to run. Only counts 

224 solver time. 

225 cputime_limit: float, defaults to np.inf 

226 The maximum CPU time in seconds that SMAC is allowed to run. Only counts 

227 solver time. 

228 solver_calls: int, defaults to None 

229 The maximum number of trials (combination of configuration, seed, budget, 

230 and instance, depending on the task) to run. If left as None, will be 

231 calculated as int(cutoff time / cputime or walltime limit) 

232 use_default_config: bool, defaults to False 

233 If True, the configspace's default configuration is evaluated in the 

234 initial design. For historic benchmark reasons, this is False by default. 

235 Notice, that this will result in n_configs + 1 for the initial design. 

236 Respecting n_trials, this will result in one fewer evaluated 

237 configuration in the optimization. 

238 instances: list[str] | None, defaults to None 

239 Names of the instances to use. If None, no instances are used. Instances 

240 could be dataset names, seeds, subsets, etc. 

241 feature_data: FeatureDataFrame or Path, defaults to None 

242 Instances can be associated with features. For example, meta data of 

243 the dataset (mean, var, ...) can be incorporated which are then further 

244 used to expand the training data of the surrogate model. If Path, loaded 

245 from file. When no features are given, uses index as instance features. 

246 min_budget: float | int | None, defaults to None 

247 The minimum budget (epochs, subset size, number of instances, ...) that 

248 is used for the optimization. Use this argument if you use multi-fidelity 

249 or instance optimization. 

250 max_budget: float | int | None, defaults to None 

251 The maximum budget (epochs, subset size, number of instances, ...) that 

252 is used for the optimization. Use this argument if you use multi-fidelity 

253 or instance optimization. 

254 seed: int, defaults to -1 

255 The seed is used to make results reproducible. 

256 If seed is -1, SMAC will generate a random seed. 

257 n_workers: int, defaults to 1 

258 The number of workers to use for parallelization. 

259 If `n_workers` is greather than 1, SMAC will use DASK to parallelize the 

260 optimization. 

261 max_ratio: float, defaults to None. 

262 Facade uses at most scenario.n_trials * max_ratio number of 

263 configurations in the initial design. Additional configurations are not 

264 affected by this parameter. Not applicable to each facade. 

265 smac3_output_directory: Path, defaults to Path() 

266 The output subdirectory for the SMAC3 scenario. Defaults to the scenario 

267 results directory. 

268 timestamp: An optional timestamp for the directory name. 

269 """ 

270 super().__init__( 

271 solver, 

272 instance_set, 

273 sparkle_objectives, 

274 number_of_runs, 

275 parent_directory, 

276 timestamp, 

277 ) 

278 self.feature_data = feature_data 

279 if isinstance(self.feature_data, Path): # Load from file 

280 self.feature_data = FeatureDataFrame(self.feature_data) 

281 

282 # Facade parameters 

283 self.smac_facade = smac_facade 

284 if isinstance(self.smac_facade, str): 

285 self.smac_facade = getattr(smacfacades, self.smac_facade) 

286 self.max_ratio = max_ratio 

287 

288 if self.feature_data is not None: 

289 instance_features = { 

290 instance: self.feature_data.get_instance(str(instance)) 

291 for instance in self.instance_set.instance_paths 

292 } 

293 else: 

294 # 'If no instance features are passed, the runhistory encoder can not 

295 # distinguish between different instances and therefore returns the same data 

296 # points with different values, all of which are used to train the surrogate 

297 # model. Consider using instance indices as features.' 

298 instance_features = { 

299 name: [index] for index, name in enumerate(instance_set.instance_paths) 

300 } 

301 

302 # NOTE: Patchfix; SMAC3 can handle MO but Sparkle also gives non-user specified 

303 # objectives but not all class methods can handle it here yet 

304 self.sparkle_objective = sparkle_objectives[0] 

305 

306 # NOTE: We don't use trial_walltime_limit as a way of managing resources 

307 # As it uses pynisher to do it (python based) and our targets are maybe not 

308 # RunSolver is the better option for accuracy. 

309 self.solver_cutoff_time = solver_cutoff_time 

310 if solver_calls is None: # If solver calls is None, try to calculate it 

311 if self.solver_cutoff_time is not None and (cputime_limit or walltime_limit): 

312 if cputime_limit: 

313 solver_calls = int(cputime_limit / self.solver_cutoff_time) 

314 elif walltime_limit: 

315 solver_calls = int(walltime_limit / self.solver_cutoff_time) 

316 else: 

317 solver_calls = 100 # SMAC3 Default value 

318 self.smac3_output_directory = smac3_output_directory 

319 self.crash_cost = crash_cost 

320 self.termination_cost_threshold = termination_cost_threshold 

321 self.walltime_limit = walltime_limit 

322 self.cputime_limit = cputime_limit 

323 self.solver_calls = solver_calls 

324 self.use_default_config = use_default_config 

325 self.instance_features = instance_features 

326 self.min_budget = min_budget 

327 self.max_budget = max_budget 

328 self.seed = seed 

329 self.n_workers = n_workers 

330 self.smac3_scenario: Optional[SmacScenario] = None 

331 

332 def create_scenario(self: SMAC3Scenario) -> None: 

333 """This prepares all the necessary subdirectories related to configuration.""" 

334 super().create_scenario() 

335 self.log_dir.mkdir(parents=True) 

336 if self.smac3_scenario is None: 

337 self.set_smac3_scenario() 

338 self.create_scenario_file() 

339 

340 def set_smac3_scenario(self: SMAC3Scenario) -> None: 

341 """Set the smac scenario object.""" 

342 self.smac3_scenario = SmacScenario( 

343 configspace=self.solver.get_configuration_space(), 

344 name=self.name, 

345 output_directory=self.results_directory / self.smac3_output_directory, 

346 deterministic=self.solver.deterministic, 

347 objectives=[self.sparkle_objective.name], 

348 crash_cost=self.crash_cost, 

349 termination_cost_threshold=self.termination_cost_threshold, 

350 walltime_limit=self.walltime_limit, 

351 cputime_limit=self.cputime_limit, 

352 n_trials=self.solver_calls, 

353 use_default_config=self.use_default_config, 

354 instances=self.instance_set.instance_paths, 

355 instance_features=self.instance_features, 

356 min_budget=self.min_budget, 

357 max_budget=self.max_budget, 

358 seed=self.seed, 

359 n_workers=self.n_workers, 

360 ) 

361 

362 @property 

363 def log_dir(self: SMAC3Scenario) -> Path: 

364 """Return the path of the log directory.""" 

365 if self.directory: 

366 return self.directory / "logs" 

367 return None 

368 

369 @property 

370 def configurator(self: SMAC3Scenario) -> SMAC3: 

371 """Return the type of configurator the scenario belongs to.""" 

372 return SMAC3 

373 

374 def create_scenario_file(self: SMAC3Scenario) -> Path: 

375 """Create a file with the configuration scenario.""" 

376 with self.scenario_file_path.open("w") as file: 

377 for key, value in self.serialise().items(): 

378 file.write(f"{key} = {value}\n") 

379 

380 def serialise(self: SMAC3Scenario) -> dict: 

381 """Serialize the configuration scenario.""" 

382 feature_data = str(self.feature_data.csv_filepath) if self.feature_data else None 

383 return { 

384 "solver": self.solver.directory, 

385 "instance_set": self.instance_set.directory, 

386 "sparkle_objectives": ",".join(self.smac3_scenario.objectives), 

387 "solver_cutoff_time": self.solver_cutoff_time, 

388 "number_of_runs": self.number_of_runs, 

389 "smac_facade": self.smac_facade.__name__, 

390 "crash_cost": self.smac3_scenario.crash_cost, 

391 "termination_cost_threshold": self.smac3_scenario.termination_cost_threshold, 

392 "walltime_limit": self.smac3_scenario.walltime_limit, 

393 "cputime_limit": self.smac3_scenario.cputime_limit, 

394 "solver_calls": self.smac3_scenario.n_trials, 

395 "use_default_config": self.smac3_scenario.use_default_config, 

396 "feature_data": feature_data, 

397 "min_budget": self.smac3_scenario.min_budget, 

398 "max_budget": self.smac3_scenario.max_budget, 

399 "seed": self.smac3_scenario.seed, 

400 "n_workers": self.smac3_scenario.n_workers, 

401 } 

402 

403 @staticmethod 

404 def from_file(scenario_file: Path, run_index: int = None) -> SMAC3Scenario: 

405 """Reads scenario file and initalises ConfigurationScenario. 

406 

407 Args: 

408 scenario_file: Path to scenario file. 

409 run_index: If given, reads as the scenario with run_index for offset 

410 in output directory and seed. 

411 

412 Returns: 

413 ConfigurationScenario. 

414 """ 

415 import ast 

416 

417 variables = { 

418 keyvalue[0]: keyvalue[1].strip() 

419 for keyvalue in ( 

420 line.split(" = ", maxsplit=1) 

421 for line in scenario_file.open().readlines() 

422 if line.strip() != "" 

423 ) 

424 } 

425 variables["solver"] = Solver(Path(variables["solver"])) 

426 variables["instance_set"] = Instance_Set(Path(variables["instance_set"])) 

427 variables["sparkle_objectives"] = [ 

428 resolve_objective(o) for o in variables["sparkle_objectives"].split(",") 

429 ] 

430 variables["parent_directory"] = scenario_file.parent.parent 

431 variables["solver_cutoff_time"] = int(variables["solver_cutoff_time"]) 

432 variables["number_of_runs"] = int(variables["number_of_runs"]) 

433 variables["smac_facade"] = getattr(smacfacades, variables["smac_facade"]) 

434 

435 # We need to support both lists of floats and single float (np.inf is fine) 

436 if variables["crash_cost"].startswith("["): 

437 variables["crash_cost"] = [ 

438 float(v) for v in ast.literal_eval(variables["crash_cost"]) 

439 ] 

440 else: 

441 variables["crash_cost"] = float(variables["crash_cost"]) 

442 if variables["termination_cost_threshold"].startswith("["): 

443 variables["termination_cost_threshold"] = [ 

444 float(v) 

445 for v in ast.literal_eval(variables["termination_cost_threshold"]) 

446 ] 

447 else: 

448 variables["termination_cost_threshold"] = float( 

449 variables["termination_cost_threshold"] 

450 ) 

451 

452 variables["walltime_limit"] = float(variables["walltime_limit"]) 

453 variables["cputime_limit"] = float(variables["cputime_limit"]) 

454 variables["solver_calls"] = ast.literal_eval(variables["solver_calls"]) 

455 variables["use_default_config"] = ast.literal_eval( 

456 variables["use_default_config"] 

457 ) 

458 

459 if variables["feature_data"] != "None": 

460 variables["feature_data"] = Path(variables["feature_data"]) 

461 else: 

462 variables["feature_data"] = None 

463 

464 variables["min_budget"] = ast.literal_eval(variables["min_budget"]) 

465 variables["max_budget"] = ast.literal_eval(variables["max_budget"]) 

466 

467 variables["seed"] = ast.literal_eval(variables["seed"]) 

468 variables["n_workers"] = ast.literal_eval(variables["n_workers"]) 

469 if run_index is not None: # Offset 

470 variables["seed"] += run_index 

471 variables["smac3_output_directory"] = Path(f"run_{run_index}") 

472 

473 timestamp = scenario_file.parent.name.split("_")[-1] 

474 scenario = SMAC3Scenario(**variables, timestamp=timestamp) 

475 scenario.set_smac3_scenario() 

476 return scenario