Coverage for sparkle/configurator/implementations/smac3.py: 84%

139 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 13:21 +0000

1"""Configurator classes to implement SMAC3 in Sparkle.""" 

2from __future__ import annotations 

3from pathlib import Path 

4import shutil 

5 

6from smac import version as smac_version 

7from smac import Scenario as SmacScenario 

8from smac import facade as smacfacades 

9from smac.runhistory.enumerations import StatusType as SmacStatusType 

10import numpy as np 

11 

12from runrunner import Runner, Run 

13 

14from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

15from sparkle.solver import Solver 

16from sparkle.structures import FeatureDataFrame, PerformanceDataFrame 

17from sparkle.instance import InstanceSet, Instance_Set 

18from sparkle.types import SparkleObjective, resolve_objective, SolverStatus 

19 

20 

21class SMAC3(Configurator): 

22 """Class for SMAC3 (Python) configurator.""" 

23 configurator_path = Path(__file__).parent.resolve() / "SMAC3" 

24 configurator_target = configurator_path / "smac3_target_algorithm.py" 

25 

26 full_name = "Sequential Model-based Algorithm Configuration" 

27 version = smac_version 

28 

29 def __init__(self: SMAC3) -> None: 

30 """Returns the SMAC3 configurator, Python SMAC V2.3.1.""" 

31 return super().__init__(multi_objective_support=False) 

32 

33 @property 

34 def name(self: SMAC3) -> str: 

35 """Returns the name of the configurator.""" 

36 return SMAC3.__name__ 

37 

38 @staticmethod 

39 def scenario_class() -> ConfigurationScenario: 

40 """Returns the SMAC3 scenario class.""" 

41 return SMAC3Scenario 

42 

43 @staticmethod 

44 def check_requirements(verbose: bool = False) -> bool: 

45 """Check that SMAC3 is installed.""" 

46 return True # Is automatically installed with Sparkle 

47 

48 @staticmethod 

49 def download_requirements() -> None: 

50 """Download SMAC3.""" 

51 return # Nothing to do 

52 

53 def configure(self: SMAC3, 

54 scenario: SMAC3Scenario, 

55 data_target: PerformanceDataFrame, 

56 validate_after: bool = True, 

57 sbatch_options: list[str] = [], 

58 slurm_prepend: str | list[str] | Path = None, 

59 num_parallel_jobs: int = None, 

60 base_dir: Path = None, 

61 run_on: Runner = Runner.SLURM) -> list[Run]: 

62 """Start configuration job. 

63 

64 Args: 

65 scenario: ConfigurationScenario object 

66 data_target: PerformanceDataFrame where to store the found configurations 

67 validate_after: Whether the Validator will be called after the configuration 

68 sbatch_options: List of slurm batch options to use 

69 slurm_prepend: Slurm script to prepend to the sbatch 

70 num_parallel_jobs: The maximum number of jobs to run parallel. 

71 base_dir: The path where the sbatch scripts will be created for Slurm. 

72 run_on: On which platform to run the jobs. Default: Slurm. 

73 

74 Returns: 

75 A RunRunner Run object. 

76 """ 

77 if (scenario.smac3_scenario.walltime_limit 

78 == scenario.smac3_scenario.cputime_limit == np.inf): 

79 print("WARNING: Starting SMAC3 scenario without any time limit.") 

80 scenario.create_scenario() 

81 configuration_ids = scenario.configuration_ids 

82 # TODO: Setting seeds like this is weird and should be inspected. 

83 # It could be good to take perhaps a seed from the scenario and use that 

84 # to generate a seed per run 

85 seeds = [i for i in range(scenario.number_of_runs)] 

86 num_parallel_jobs = num_parallel_jobs or scenario.number_of_runs 

87 # We do not require the configurator CLI as its already our own python wrapper 

88 cmds = [f"python3 {self.configurator_target.absolute()} " 

89 f"{scenario.scenario_file_path.absolute()} {configuration_id} {seed} " 

90 f"{data_target.csv_filepath}" 

91 for configuration_id, seed in zip(configuration_ids, seeds)] 

92 return super().configure( 

93 configuration_commands=cmds, 

94 data_target=data_target, 

95 output=None, 

96 scenario=scenario, 

97 configuration_ids=configuration_ids, 

98 validate_after=validate_after, 

99 sbatch_options=sbatch_options, 

100 slurm_prepend=slurm_prepend, 

101 num_parallel_jobs=num_parallel_jobs, 

102 base_dir=base_dir, 

103 run_on=run_on 

104 ) 

105 

106 @staticmethod 

107 def organise_output(output_source: Path, 

108 output_target: Path, 

109 scenario: SMAC3Scenario, 

110 configuration_id: str) -> None | str: 

111 """Method to restructure and clean up after a single configurator call.""" 

112 import json 

113 if not output_source.exists(): 

114 print(f"SMAC3 ERROR: Output source file does not exist! [{output_source}]") 

115 return 

116 results_dict = json.load(output_source.open("r")) 

117 configurations = [value for _, value in results_dict["configs"].items()] 

118 config_evals = [[] for _ in range(len(configurations))] 

119 objective = scenario.sparkle_objective 

120 for entry in results_dict["data"]: 

121 smac_conf_id = entry["config_id"] 

122 score = entry["cost"] 

123 # SMAC3 configuration ids start at 1 

124 config_evals[smac_conf_id - 1].append(score) 

125 config_evals = [objective.instance_aggregator(evaluations) 

126 for evaluations in config_evals] 

127 best_config = configurations[ 

128 config_evals.index(objective.solver_aggregator(config_evals))] 

129 return Configurator.save_configuration(scenario, configuration_id, 

130 best_config, output_target) 

131 

132 def get_status_from_logs(self: SMAC3) -> None: 

133 """Method to scan the log files of the configurator for warnings.""" 

134 raise NotImplementedError 

135 

136 @staticmethod 

137 def convert_status(status: SolverStatus) -> SmacStatusType: 

138 """Converts Sparkle Solver status to SMAC3 target status.""" 

139 mapping = { 

140 SolverStatus.SUCCESS: SmacStatusType.SUCCESS, 

141 SolverStatus.CRASHED: SmacStatusType.CRASHED, 

142 SolverStatus.TIMEOUT: SmacStatusType.TIMEOUT, 

143 SolverStatus.WRONG: SmacStatusType.CRASHED, 

144 SolverStatus.UNKNOWN: SmacStatusType.CRASHED, 

145 SolverStatus.ERROR: SmacStatusType.CRASHED, 

146 SolverStatus.KILLED: SmacStatusType.TIMEOUT, 

147 SolverStatus.SAT: SmacStatusType.SUCCESS, 

148 SolverStatus.UNSAT: SmacStatusType.SUCCESS 

149 } 

150 return mapping[status] 

151 

152 

153class SMAC3Scenario(ConfigurationScenario): 

154 """Class to handle SMAC3 configuration scenarios.""" 

155 

156 def __init__(self: SMAC3Scenario, 

157 solver: Solver, 

158 instance_set: InstanceSet, 

159 sparkle_objectives: list[SparkleObjective], 

160 number_of_runs: int, 

161 parent_directory: Path, 

162 solver_cutoff_time: int = None, 

163 smac_facade: smacfacades.AbstractFacade | str = 

164 smacfacades.AlgorithmConfigurationFacade, 

165 crash_cost: float | list[float] = np.inf, 

166 termination_cost_threshold: float | list[float] = np.inf, 

167 walltime_limit: float = np.inf, 

168 cputime_limit: float = np.inf, 

169 solver_calls: int = None, 

170 use_default_config: bool = False, 

171 feature_data: FeatureDataFrame | Path = None, 

172 min_budget: float | int | None = None, 

173 max_budget: float | int | None = None, 

174 seed: int = -1, 

175 n_workers: int = 1, 

176 max_ratio: float = None, 

177 smac3_output_directory: Path = Path(), 

178 ) -> None: 

179 """Initialize scenario paths and names. 

180 

181 Args: 

182 solver: Solver 

183 The solver to use for configuration. 

184 instance_set: InstanceSet 

185 The instance set to use for configuration. 

186 sparkle_objectives: list[SparkleObjective] 

187 The objectives to optimize. 

188 number_of_runs: int 

189 The number of times this scenario will be executed with different seeds. 

190 parent_directory: Path 

191 The parent directory where the configuration files will be stored. 

192 solver_cutoff_time: int 

193 Maximum CPU runtime in seconds that each solver call (trial) 

194 is allowed to run. Is managed by RunSolver, not pynisher. 

195 smac_facade: AbstractFacade, defaults to AlgorithmConfigurationFacade 

196 The SMAC facade to use for Optimisation. 

197 crash_cost: float | list[float], defaults to np.inf 

198 Defines the cost for a failed trial. In case of multi-objective, 

199 each objective can be associated with a different cost. 

200 termination_cost_threshold: float | list[float], defaults to np.inf 

201 Defines a cost threshold when the optimization should stop. In case of 

202 multi-objective, each objective *must* be associated with a cost. 

203 The optimization stops when all objectives crossed the threshold. 

204 walltime_limit: float, defaults to np.inf 

205 The maximum time in seconds that SMAC is allowed to run. Only counts 

206 solver time. 

207 cputime_limit: float, defaults to np.inf 

208 The maximum CPU time in seconds that SMAC is allowed to run. Only counts 

209 solver time. 

210 solver_calls: int, defaults to None 

211 The maximum number of trials (combination of configuration, seed, budget, 

212 and instance, depending on the task) to run. If left as None, will be 

213 calculated as int(cutoff time / cputime or walltime limit) 

214 use_default_config: bool, defaults to False 

215 If True, the configspace's default configuration is evaluated in the 

216 initial design. For historic benchmark reasons, this is False by default. 

217 Notice, that this will result in n_configs + 1 for the initial design. 

218 Respecting n_trials, this will result in one fewer evaluated 

219 configuration in the optimization. 

220 instances: list[str] | None, defaults to None 

221 Names of the instances to use. If None, no instances are used. Instances 

222 could be dataset names, seeds, subsets, etc. 

223 feature_data: FeatureDataFrame or Path, defaults to None 

224 Instances can be associated with features. For example, meta data of 

225 the dataset (mean, var, ...) can be incorporated which are then further 

226 used to expand the training data of the surrogate model. If Path, loaded 

227 from file. When no features are given, uses index as instance features. 

228 min_budget: float | int | None, defaults to None 

229 The minimum budget (epochs, subset size, number of instances, ...) that 

230 is used for the optimization. Use this argument if you use multi-fidelity 

231 or instance optimization. 

232 max_budget: float | int | None, defaults to None 

233 The maximum budget (epochs, subset size, number of instances, ...) that 

234 is used for the optimization. Use this argument if you use multi-fidelity 

235 or instance optimization. 

236 seed: int, defaults to -1 

237 The seed is used to make results reproducible. 

238 If seed is -1, SMAC will generate a random seed. 

239 n_workers: int, defaults to 1 

240 The number of workers to use for parallelization. 

241 If `n_workers` is greather than 1, SMAC will use DASK to parallelize the 

242 optimization. 

243 max_ratio: float, defaults to None. 

244 Facade uses at most scenario.n_trials * max_ratio number of 

245 configurations in the initial design. Additional configurations are not 

246 affected by this parameter. Not applicable to each facade. 

247 smac3_output_directory: Path, defaults to Path() 

248 The output subdirectory for the SMAC3 scenario. Defaults to the scenario 

249 results directory. 

250 """ 

251 super().__init__(solver, instance_set, sparkle_objectives, 

252 number_of_runs, parent_directory) 

253 # The files are saved in `./output_directory/name/seed`. 

254 self.log_dir = self.directory / "logs" 

255 self.feature_data = feature_data 

256 if isinstance(self.feature_data, Path): # Load from file 

257 self.feature_data = FeatureDataFrame(self.feature_data) 

258 

259 # Facade parameters 

260 self.smac_facade = smac_facade 

261 if isinstance(self.smac_facade, str): 

262 self.smac_facade = getattr(smacfacades, self.smac_facade) 

263 self.max_ratio = max_ratio 

264 

265 if self.feature_data is not None: 

266 instance_features =\ 

267 {instance: self.feature_data.get_instance(str(instance)) 

268 for instance in self.instance_set.instance_paths} 

269 else: 

270 # 'If no instance features are passed, the runhistory encoder can not 

271 # distinguish between different instances and therefore returns the same data 

272 # points with different values, all of which are used to train the surrogate 

273 # model. Consider using instance indices as features.' 

274 instance_features = {name: [index] for index, name 

275 in enumerate(instance_set.instance_paths)} 

276 

277 # NOTE: Patchfix; SMAC3 can handle MO but Sparkle also gives non-user specified 

278 # objectives but not all class methods can handle it here yet 

279 self.sparkle_objective = sparkle_objectives[0] 

280 

281 # NOTE: We don't use trial_walltime_limit as a way of managing resources 

282 # As it uses pynisher to do it (python based) and our targets are maybe not 

283 # RunSolver is the better option for accuracy. 

284 self.solver_cutoff_time = solver_cutoff_time 

285 if solver_calls is None: # If solver calls is None, try to calculate it 

286 if self.solver_cutoff_time is not None and (cputime_limit or walltime_limit): 

287 if cputime_limit: 

288 solver_calls = int(cputime_limit / self.solver_cutoff_time) 

289 elif walltime_limit: 

290 solver_calls = int(walltime_limit / self.solver_cutoff_time) 

291 else: 

292 solver_calls = 100 # SMAC3 Default value 

293 self.smac3_scenario = SmacScenario( 

294 configspace=solver.get_configuration_space(), 

295 name=self.name, 

296 output_directory=self.results_directory / smac3_output_directory, 

297 deterministic=solver.deterministic, 

298 objectives=[self.sparkle_objective.name], 

299 crash_cost=crash_cost, 

300 termination_cost_threshold=termination_cost_threshold, 

301 walltime_limit=walltime_limit, 

302 cputime_limit=cputime_limit, 

303 n_trials=solver_calls, 

304 use_default_config=use_default_config, 

305 instances=instance_set.instance_paths, 

306 instance_features=instance_features, 

307 min_budget=min_budget, 

308 max_budget=max_budget, 

309 seed=seed, 

310 n_workers=n_workers 

311 ) 

312 

313 def create_scenario(self: ConfigurationScenario) -> None: 

314 """Create scenario with solver and instances in the parent directory. 

315 

316 This prepares all the necessary subdirectories related to configuration. 

317 

318 Args: 

319 parent_directory: Directory in which the scenario should be created. 

320 """ 

321 shutil.rmtree(self.directory, ignore_errors=True) 

322 self.directory.mkdir(parents=True) 

323 # Create empty directories as needed 

324 self.results_directory.mkdir(parents=True) # Prepare results directory 

325 self.log_dir.mkdir(parents=True) 

326 self.validation.mkdir(parents=True, exist_ok=True) 

327 self.create_scenario_file() 

328 

329 @property 

330 def configurator(self: SMAC3Scenario) -> SMAC3: 

331 """Return the type of configurator the scenario belongs to.""" 

332 return SMAC3 

333 

334 def create_scenario_file(self: SMAC3Scenario) -> Path: 

335 """Create a file with the configuration scenario.""" 

336 super().create_scenario_file() 

337 with self.scenario_file_path.open("w") as file: 

338 for key, value in self.serialise().items(): 

339 file.write(f"{key} = {value}\n") 

340 

341 def serialise(self: SMAC3Scenario) -> dict: 

342 """Serialize the configuration scenario.""" 

343 feature_data =\ 

344 self.feature_data.csv_filepath if self.feature_data else None 

345 return { 

346 "solver": self.solver.directory, 

347 "instance_set": self.instance_set.directory, 

348 "sparkle_objectives": ",".join(self.smac3_scenario.objectives), 

349 "solver_cutoff_time": self.solver_cutoff_time, 

350 "number_of_runs": self.number_of_runs, 

351 "smac_facade": self.smac_facade.__name__, 

352 "crash_cost": self.smac3_scenario.crash_cost, 

353 "termination_cost_threshold": self.smac3_scenario.termination_cost_threshold, 

354 "walltime_limit": self.smac3_scenario.walltime_limit, 

355 "cputime_limit": self.smac3_scenario.cputime_limit, 

356 "solver_calls": self.smac3_scenario.n_trials, 

357 "use_default_config": self.smac3_scenario.use_default_config, 

358 "feature_data": feature_data, 

359 "min_budget": self.smac3_scenario.min_budget, 

360 "max_budget": self.smac3_scenario.max_budget, 

361 "seed": self.smac3_scenario.seed, 

362 "n_workers": self.smac3_scenario.n_workers, 

363 } 

364 

365 @staticmethod 

366 def from_file(scenario_file: Path, 

367 run_index: int = None) -> SMAC3Scenario: 

368 """Reads scenario file and initalises ConfigurationScenario. 

369 

370 Args: 

371 scenario_file: Path to scenario file. 

372 run_index: If given, reads as the scenario with run_index for offset 

373 in output directory and seed. 

374 

375 Returns: 

376 ConfigurationScenario. 

377 """ 

378 import ast 

379 variables = {keyvalue[0]: keyvalue[1].strip() 

380 for keyvalue in (line.split(" = ", maxsplit=1) 

381 for line in scenario_file.open().readlines() 

382 if line.strip() != "")} 

383 variables["solver"] = Solver(Path(variables["solver"])) 

384 variables["instance_set"] = Instance_Set(Path(variables["instance_set"])) 

385 variables["sparkle_objectives"] = [ 

386 resolve_objective(o) 

387 for o in variables["sparkle_objectives"].split(",")] 

388 variables["parent_directory"] = scenario_file.parent.parent 

389 variables["solver_cutoff_time"] = int(variables["solver_cutoff_time"]) 

390 variables["number_of_runs"] = int(variables["number_of_runs"]) 

391 variables["smac_facade"] = getattr(smacfacades, variables["smac_facade"]) 

392 

393 # We need to support both lists of floats and single float (np.inf is fine) 

394 if variables["crash_cost"].startswith("["): 

395 variables["crash_cost"] =\ 

396 [float(v) for v in ast.literal_eval(variables["crash_cost"])] 

397 else: 

398 variables["crash_cost"] = float(variables["crash_cost"]) 

399 if variables["termination_cost_threshold"].startswith("["): 

400 variables["termination_cost_threshold"] =\ 

401 [float(v) for v in ast.literal_eval( 

402 variables["termination_cost_threshold"])] 

403 else: 

404 variables["termination_cost_threshold"] =\ 

405 float(variables["termination_cost_threshold"]) 

406 

407 variables["walltime_limit"] = float(variables["walltime_limit"]) 

408 variables["cputime_limit"] = float(variables["cputime_limit"]) 

409 variables["solver_calls"] = ast.literal_eval(variables["solver_calls"]) 

410 variables["use_default_config"] =\ 

411 ast.literal_eval(variables["use_default_config"]) 

412 

413 if variables["feature_data"] != "None": 

414 variables["feature_data"] = Path(variables["feature_data"]) 

415 else: 

416 variables["feature_data"] = None 

417 

418 variables["min_budget"] = ast.literal_eval(variables["min_budget"]) 

419 variables["max_budget"] = ast.literal_eval(variables["max_budget"]) 

420 

421 variables["seed"] = ast.literal_eval(variables["seed"]) 

422 variables["n_workers"] = ast.literal_eval(variables["n_workers"]) 

423 if run_index is not None: # Offset 

424 variables["seed"] += run_index 

425 variables["smac3_output_directory"] = Path(f"run_{run_index}") 

426 

427 return SMAC3Scenario(**variables)