Coverage for sparkle/configurator/implementations/smac3.py: 86%

159 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 10:17 +0000

1"""Configurator classes to implement SMAC3 in Sparkle.""" 

2 

3from __future__ import annotations 

4from pathlib import Path 

5 

6from smac import version as smac_version 

7from smac import Scenario as SmacScenario 

8from smac import facade as smacfacades 

9from smac.runhistory.enumerations import StatusType as SmacStatusType 

10import numpy as np 

11import random 

12from typing import Optional 

13 

14from runrunner import Runner, Run 

15 

16from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

17from sparkle.solver import Solver 

18from sparkle.structures import FeatureDataFrame, PerformanceDataFrame 

19from sparkle.instance import InstanceSet, Instance_Set 

20from sparkle.types import SparkleObjective, resolve_objective, SolverStatus 

21 

22 

23class SMAC3(Configurator): 

24 """Class for SMAC3 (Python) configurator.""" 

25 

26 configurator_path = Path(__file__).parent.resolve() / "SMAC3" 

27 configurator_target = configurator_path / "smac3_target_algorithm.py" 

28 

29 full_name = "Sequential Model-based Algorithm Configuration" 

30 version = smac_version 

31 

32 def __init__(self: SMAC3) -> None: 

33 """Returns the SMAC3 configurator, Python SMAC V2.3.1.""" 

34 return super().__init__(multi_objective_support=False) 

35 

36 @property 

37 def name(self: SMAC3) -> str: 

38 """Returns the name of the configurator.""" 

39 return SMAC3.__name__ 

40 

41 @staticmethod 

42 def scenario_class() -> ConfigurationScenario: 

43 """Returns the SMAC3 scenario class.""" 

44 return SMAC3Scenario 

45 

46 @staticmethod 

47 def check_requirements(verbose: bool = False) -> bool: 

48 """Check that SMAC3 is installed.""" 

49 return True # Is automatically installed with Sparkle 

50 

51 @staticmethod 

52 def download_requirements() -> None: 

53 """Download SMAC3.""" 

54 return # Nothing to do 

55 

56 def configure( 

57 self: SMAC3, 

58 scenario: SMAC3Scenario, 

59 data_target: PerformanceDataFrame, 

60 validate_after: bool = True, 

61 sbatch_options: list[str] = [], 

62 slurm_prepend: str | list[str] | Path = None, 

63 num_parallel_jobs: int = None, 

64 base_dir: Path = None, 

65 run_on: Runner = Runner.SLURM, 

66 ) -> list[Run]: 

67 """Start configuration job. 

68 

69 Args: 

70 scenario: ConfigurationScenario object 

71 data_target: PerformanceDataFrame where to store the found configurations 

72 validate_after: Whether the Validator will be called after the configuration 

73 sbatch_options: List of slurm batch options to use 

74 slurm_prepend: Slurm script to prepend to the sbatch 

75 num_parallel_jobs: The maximum number of jobs to run parallel. 

76 base_dir: The path where the sbatch scripts will be created for Slurm. 

77 run_on: On which platform to run the jobs. Default: Slurm. 

78 

79 Returns: 

80 A RunRunner Run object. 

81 """ 

82 scenario.create_scenario() 

83 if ( 

84 scenario.smac3_scenario.walltime_limit 

85 == scenario.smac3_scenario.cputime_limit 

86 == np.inf 

87 ): 

88 print("WARNING: Starting SMAC3 scenario without any time limit.") 

89 configuration_ids = scenario.configuration_ids 

90 

91 # Scenario file also has a seed, but not for all type of configurators 

92 seeds = [random.randint(0, 2**32 - 1) for _ in range(scenario.number_of_runs)] 

93 num_parallel_jobs = num_parallel_jobs or scenario.number_of_runs 

94 # We do not require the configurator CLI as its already our own python wrapper 

95 cmds = [ 

96 f"python3 {self.configurator_target.absolute()} " 

97 f"{scenario.scenario_file_path.absolute()} {configuration_id} {seed} " 

98 f"{data_target.csv_filepath}" 

99 for configuration_id, seed in zip(configuration_ids, seeds) 

100 ] 

101 return super().configure( 

102 configuration_commands=cmds, 

103 data_target=data_target, 

104 output=None, 

105 scenario=scenario, 

106 configuration_ids=configuration_ids, 

107 validate_after=validate_after, 

108 sbatch_options=sbatch_options, 

109 slurm_prepend=slurm_prepend, 

110 num_parallel_jobs=num_parallel_jobs, 

111 base_dir=base_dir, 

112 run_on=run_on, 

113 ) 

114 

115 @staticmethod 

116 def organise_output( 

117 output_source: Path, 

118 output_target: Path, 

119 scenario: SMAC3Scenario, 

120 configuration_id: str, 

121 ) -> None | str: 

122 """Method to restructure and clean up after a single configurator call.""" 

123 import json 

124 

125 if not output_source.exists(): 

126 print(f"SMAC3 ERROR: Output source file does not exist! [{output_source}]") 

127 return 

128 results_dict = json.load(output_source.open("r")) 

129 configurations = [value for _, value in results_dict["configs"].items()] 

130 config_evals = [[] for _ in range(len(configurations))] 

131 objective = scenario.sparkle_objective 

132 for entry in results_dict["data"]: 

133 smac_conf_id = entry["config_id"] 

134 score = entry["cost"] 

135 # SMAC3 configuration ids start at 1 

136 config_evals[smac_conf_id - 1].append(score) 

137 config_evals = [ 

138 objective.instance_aggregator(evaluations) for evaluations in config_evals 

139 ] 

140 best_config = configurations[ 

141 config_evals.index(objective.solver_aggregator(config_evals)) 

142 ] 

143 return Configurator.save_configuration( 

144 scenario, configuration_id, best_config, output_target 

145 ) 

146 

147 def get_status_from_logs(self: SMAC3) -> None: 

148 """Method to scan the log files of the configurator for warnings.""" 

149 raise NotImplementedError 

150 

151 @staticmethod 

152 def convert_status(status: SolverStatus) -> SmacStatusType: 

153 """Converts Sparkle Solver status to SMAC3 target status.""" 

154 mapping = { 

155 SolverStatus.SUCCESS: SmacStatusType.SUCCESS, 

156 SolverStatus.CRASHED: SmacStatusType.CRASHED, 

157 SolverStatus.TIMEOUT: SmacStatusType.TIMEOUT, 

158 SolverStatus.WRONG: SmacStatusType.CRASHED, 

159 SolverStatus.UNKNOWN: SmacStatusType.CRASHED, 

160 SolverStatus.ERROR: SmacStatusType.CRASHED, 

161 SolverStatus.KILLED: SmacStatusType.TIMEOUT, 

162 SolverStatus.SAT: SmacStatusType.SUCCESS, 

163 SolverStatus.UNSAT: SmacStatusType.SUCCESS, 

164 } 

165 return mapping[status] 

166 

167 

168class SMAC3Scenario(ConfigurationScenario): 

169 """Class to handle SMAC3 configuration scenarios.""" 

170 

171 def __init__( 

172 self: SMAC3Scenario, 

173 solver: Solver, 

174 instance_set: InstanceSet, 

175 sparkle_objectives: list[SparkleObjective], 

176 number_of_runs: int, 

177 parent_directory: Path, 

178 solver_cutoff_time: int = None, 

179 smac_facade: smacfacades.AbstractFacade 

180 | str = smacfacades.AlgorithmConfigurationFacade, 

181 crash_cost: float | list[float] = np.inf, 

182 termination_cost_threshold: float | list[float] = np.inf, 

183 walltime_limit: float = np.inf, 

184 cputime_limit: float = np.inf, 

185 solver_calls: int = None, 

186 use_default_config: bool = False, 

187 feature_data: FeatureDataFrame | Path = None, 

188 min_budget: float | int | None = None, 

189 max_budget: float | int | None = None, 

190 seed: int = -1, 

191 n_workers: int = 1, 

192 max_ratio: float = None, 

193 smac3_output_directory: Path = Path(), 

194 timestamp: str = None, 

195 ) -> None: 

196 """Initialize scenario paths and names. 

197 

198 Args: 

199 solver: Solver 

200 The solver to use for configuration. 

201 instance_set: InstanceSet 

202 The instance set to use for configuration. 

203 sparkle_objectives: list[SparkleObjective] 

204 The objectives to optimize. 

205 number_of_runs: int 

206 The number of times this scenario will be executed with different seeds. 

207 parent_directory: Path 

208 The parent directory where the configuration files will be stored. 

209 solver_cutoff_time: int 

210 Maximum CPU runtime in seconds that each solver call (trial) 

211 is allowed to run. Is managed by RunSolver, not pynisher. 

212 smac_facade: AbstractFacade, defaults to AlgorithmConfigurationFacade 

213 The SMAC facade to use for Optimisation. 

214 crash_cost: float | list[float], defaults to np.inf 

215 Defines the cost for a failed trial. In case of multi-objective, 

216 each objective can be associated with a different cost. 

217 termination_cost_threshold: float | list[float], defaults to np.inf 

218 Defines a cost threshold when the optimization should stop. In case of 

219 multi-objective, each objective *must* be associated with a cost. 

220 The optimization stops when all objectives crossed the threshold. 

221 walltime_limit: float, defaults to np.inf 

222 The maximum time in seconds that SMAC is allowed to run. Only counts 

223 solver time. 

224 cputime_limit: float, defaults to np.inf 

225 The maximum CPU time in seconds that SMAC is allowed to run. Only counts 

226 solver time. 

227 solver_calls: int, defaults to None 

228 The maximum number of trials (combination of configuration, seed, budget, 

229 and instance, depending on the task) to run. If left as None, will be 

230 calculated as int(cutoff time / cputime or walltime limit) 

231 use_default_config: bool, defaults to False 

232 If True, the configspace's default configuration is evaluated in the 

233 initial design. For historic benchmark reasons, this is False by default. 

234 Notice, that this will result in n_configs + 1 for the initial design. 

235 Respecting n_trials, this will result in one fewer evaluated 

236 configuration in the optimization. 

237 instances: list[str] | None, defaults to None 

238 Names of the instances to use. If None, no instances are used. Instances 

239 could be dataset names, seeds, subsets, etc. 

240 feature_data: FeatureDataFrame or Path, defaults to None 

241 Instances can be associated with features. For example, meta data of 

242 the dataset (mean, var, ...) can be incorporated which are then further 

243 used to expand the training data of the surrogate model. If Path, loaded 

244 from file. When no features are given, uses index as instance features. 

245 min_budget: float | int | None, defaults to None 

246 The minimum budget (epochs, subset size, number of instances, ...) that 

247 is used for the optimization. Use this argument if you use multi-fidelity 

248 or instance optimization. 

249 max_budget: float | int | None, defaults to None 

250 The maximum budget (epochs, subset size, number of instances, ...) that 

251 is used for the optimization. Use this argument if you use multi-fidelity 

252 or instance optimization. 

253 seed: int, defaults to -1 

254 The seed is used to make results reproducible. 

255 If seed is -1, SMAC will generate a random seed. 

256 n_workers: int, defaults to 1 

257 The number of workers to use for parallelization. 

258 If `n_workers` is greather than 1, SMAC will use DASK to parallelize the 

259 optimization. 

260 max_ratio: float, defaults to None. 

261 Facade uses at most scenario.n_trials * max_ratio number of 

262 configurations in the initial design. Additional configurations are not 

263 affected by this parameter. Not applicable to each facade. 

264 smac3_output_directory: Path, defaults to Path() 

265 The output subdirectory for the SMAC3 scenario. Defaults to the scenario 

266 results directory. 

267 timestamp: An optional timestamp for the directory name. 

268 """ 

269 super().__init__( 

270 solver, 

271 instance_set, 

272 sparkle_objectives, 

273 number_of_runs, 

274 parent_directory, 

275 timestamp, 

276 ) 

277 self.feature_data = feature_data 

278 if isinstance(self.feature_data, Path): # Load from file 

279 self.feature_data = FeatureDataFrame(self.feature_data) 

280 

281 # Facade parameters 

282 self.smac_facade = smac_facade 

283 if isinstance(self.smac_facade, str): 

284 self.smac_facade = getattr(smacfacades, self.smac_facade) 

285 self.max_ratio = max_ratio 

286 

287 if self.feature_data is not None: 

288 instance_features = { 

289 instance: self.feature_data.get_instance(str(instance)) 

290 for instance in self.instance_set.instance_paths 

291 } 

292 else: 

293 # 'If no instance features are passed, the runhistory encoder can not 

294 # distinguish between different instances and therefore returns the same data 

295 # points with different values, all of which are used to train the surrogate 

296 # model. Consider using instance indices as features.' 

297 instance_features = { 

298 name: [index] for index, name in enumerate(instance_set.instance_paths) 

299 } 

300 

301 # NOTE: Patchfix; SMAC3 can handle MO but Sparkle also gives non-user specified 

302 # objectives but not all class methods can handle it here yet 

303 self.sparkle_objective = sparkle_objectives[0] 

304 

305 # NOTE: We don't use trial_walltime_limit as a way of managing resources 

306 # As it uses pynisher to do it (python based) and our targets are maybe not 

307 # RunSolver is the better option for accuracy. 

308 self.solver_cutoff_time = solver_cutoff_time 

309 if solver_calls is None: # If solver calls is None, try to calculate it 

310 if self.solver_cutoff_time is not None and (cputime_limit or walltime_limit): 

311 if cputime_limit: 

312 solver_calls = int(cputime_limit / self.solver_cutoff_time) 

313 elif walltime_limit: 

314 solver_calls = int(walltime_limit / self.solver_cutoff_time) 

315 else: 

316 solver_calls = 100 # SMAC3 Default value 

317 self.smac3_output_directory = smac3_output_directory 

318 self.crash_cost = crash_cost 

319 self.termination_cost_threshold = termination_cost_threshold 

320 self.walltime_limit = walltime_limit 

321 self.cputime_limit = cputime_limit 

322 self.solver_calls = solver_calls 

323 self.use_default_config = use_default_config 

324 self.instance_features = instance_features 

325 self.min_budget = min_budget 

326 self.max_budget = max_budget 

327 self.seed = seed 

328 self.n_workers = n_workers 

329 self.smac3_scenario: Optional[SmacScenario] = None 

330 

331 def create_scenario(self: SMAC3Scenario) -> None: 

332 """This prepares all the necessary subdirectories related to configuration.""" 

333 super().create_scenario() 

334 self.log_dir.mkdir(parents=True) 

335 if self.smac3_scenario is None: 

336 self.set_smac3_scenario() 

337 self.create_scenario_file() 

338 

339 def set_smac3_scenario(self: SMAC3Scenario) -> None: 

340 """Set the smac scenario object.""" 

341 self.smac3_scenario = SmacScenario( 

342 configspace=self.solver.get_configuration_space(), 

343 name=self.name, 

344 output_directory=self.results_directory / self.smac3_output_directory, 

345 deterministic=self.solver.deterministic, 

346 objectives=[self.sparkle_objective.name], 

347 crash_cost=self.crash_cost, 

348 termination_cost_threshold=self.termination_cost_threshold, 

349 walltime_limit=self.walltime_limit, 

350 cputime_limit=self.cputime_limit, 

351 n_trials=self.solver_calls, 

352 use_default_config=self.use_default_config, 

353 instances=self.instance_set.instance_paths, 

354 instance_features=self.instance_features, 

355 min_budget=self.min_budget, 

356 max_budget=self.max_budget, 

357 seed=self.seed, 

358 n_workers=self.n_workers, 

359 ) 

360 

361 @property 

362 def log_dir(self: SMAC3Scenario) -> Path: 

363 """Return the path of the log directory.""" 

364 if self.directory: 

365 return self.directory / "logs" 

366 return None 

367 

368 @property 

369 def configurator(self: SMAC3Scenario) -> SMAC3: 

370 """Return the type of configurator the scenario belongs to.""" 

371 return SMAC3 

372 

373 def create_scenario_file(self: SMAC3Scenario) -> Path: 

374 """Create a file with the configuration scenario.""" 

375 with self.scenario_file_path.open("w") as file: 

376 for key, value in self.serialise().items(): 

377 file.write(f"{key} = {value}\n") 

378 

379 def serialise(self: SMAC3Scenario) -> dict: 

380 """Serialize the configuration scenario.""" 

381 feature_data = str(self.feature_data.csv_filepath) if self.feature_data else None 

382 return { 

383 "solver": self.solver.directory, 

384 "instance_set": self.instance_set.directory, 

385 "sparkle_objectives": ",".join(self.smac3_scenario.objectives), 

386 "solver_cutoff_time": self.solver_cutoff_time, 

387 "number_of_runs": self.number_of_runs, 

388 "smac_facade": self.smac_facade.__name__, 

389 "crash_cost": self.smac3_scenario.crash_cost, 

390 "termination_cost_threshold": self.smac3_scenario.termination_cost_threshold, 

391 "walltime_limit": self.smac3_scenario.walltime_limit, 

392 "cputime_limit": self.smac3_scenario.cputime_limit, 

393 "solver_calls": self.smac3_scenario.n_trials, 

394 "use_default_config": self.smac3_scenario.use_default_config, 

395 "feature_data": feature_data, 

396 "min_budget": self.smac3_scenario.min_budget, 

397 "max_budget": self.smac3_scenario.max_budget, 

398 "seed": self.smac3_scenario.seed, 

399 "n_workers": self.smac3_scenario.n_workers, 

400 } 

401 

402 @staticmethod 

403 def from_file(scenario_file: Path, run_index: int = None) -> SMAC3Scenario: 

404 """Reads scenario file and initalises ConfigurationScenario. 

405 

406 Args: 

407 scenario_file: Path to scenario file. 

408 run_index: If given, reads as the scenario with run_index for offset 

409 in output directory and seed. 

410 

411 Returns: 

412 ConfigurationScenario. 

413 """ 

414 import ast 

415 

416 variables = { 

417 keyvalue[0]: keyvalue[1].strip() 

418 for keyvalue in ( 

419 line.split(" = ", maxsplit=1) 

420 for line in scenario_file.open().readlines() 

421 if line.strip() != "" 

422 ) 

423 } 

424 variables["solver"] = Solver(Path(variables["solver"])) 

425 variables["instance_set"] = Instance_Set(Path(variables["instance_set"])) 

426 variables["sparkle_objectives"] = [ 

427 resolve_objective(o) for o in variables["sparkle_objectives"].split(",") 

428 ] 

429 variables["parent_directory"] = scenario_file.parent.parent 

430 variables["solver_cutoff_time"] = int(variables["solver_cutoff_time"]) 

431 variables["number_of_runs"] = int(variables["number_of_runs"]) 

432 variables["smac_facade"] = getattr(smacfacades, variables["smac_facade"]) 

433 

434 # We need to support both lists of floats and single float (np.inf is fine) 

435 if variables["crash_cost"].startswith("["): 

436 variables["crash_cost"] = [ 

437 float(v) for v in ast.literal_eval(variables["crash_cost"]) 

438 ] 

439 else: 

440 variables["crash_cost"] = float(variables["crash_cost"]) 

441 if variables["termination_cost_threshold"].startswith("["): 

442 variables["termination_cost_threshold"] = [ 

443 float(v) 

444 for v in ast.literal_eval(variables["termination_cost_threshold"]) 

445 ] 

446 else: 

447 variables["termination_cost_threshold"] = float( 

448 variables["termination_cost_threshold"] 

449 ) 

450 

451 variables["walltime_limit"] = float(variables["walltime_limit"]) 

452 variables["cputime_limit"] = float(variables["cputime_limit"]) 

453 variables["solver_calls"] = ast.literal_eval(variables["solver_calls"]) 

454 variables["use_default_config"] = ast.literal_eval( 

455 variables["use_default_config"] 

456 ) 

457 

458 if variables["feature_data"] != "None": 

459 variables["feature_data"] = Path(variables["feature_data"]) 

460 else: 

461 variables["feature_data"] = None 

462 

463 variables["min_budget"] = ast.literal_eval(variables["min_budget"]) 

464 variables["max_budget"] = ast.literal_eval(variables["max_budget"]) 

465 

466 variables["seed"] = ast.literal_eval(variables["seed"]) 

467 variables["n_workers"] = ast.literal_eval(variables["n_workers"]) 

468 if run_index is not None: # Offset 

469 variables["seed"] += run_index 

470 variables["smac3_output_directory"] = Path(f"run_{run_index}") 

471 

472 timestamp = scenario_file.parent.name.split("_")[-1] 

473 scenario = SMAC3Scenario(**variables, timestamp=timestamp) 

474 scenario.set_smac3_scenario() 

475 return scenario