Coverage for src/sparkle/configurator/implementations/smac3.py: 86%

1"""Configurator classes to implement SMAC3 in Sparkle."""

3from __future__ import annotations

4from pathlib import Path

6from smac import version as smac_version

7from smac import Scenario as SmacScenario

8from smac import facade as smacfacades

9from smac.runhistory.enumerations import StatusType as SmacStatusType

10import numpy as np

11import random

12from typing import Optional

14from runrunner import Runner, Run

16from sparkle.configurator.configurator import Configurator, ConfigurationScenario

17from sparkle.solver import Solver

18from sparkle.structures import FeatureDataFrame, PerformanceDataFrame

19from sparkle.instance import InstanceSet, Instance_Set

20from sparkle.types import SparkleObjective, resolve_objective, SolverStatus

23class SMAC3(Configurator):

24 """Class for SMAC3 (Python) configurator."""

26 configurator_path = Path(__file__).parent.resolve() / "SMAC3"

27 configurator_target = configurator_path / "smac3_target_algorithm.py"

29 full_name = "Sequential Model-based Algorithm Configuration"

30 version = smac_version

32 def __init__(self: SMAC3) -> None:

33 """Returns the SMAC3 configurator, Python SMAC V2.3.1."""

34 return super().__init__(multi_objective_support=False)

36 @property

37 def name(self: SMAC3) -> str:

38 """Returns the name of the configurator."""

39 return SMAC3.__name__

41 @staticmethod

42 def scenario_class() -> ConfigurationScenario:

43 """Returns the SMAC3 scenario class."""

44 return SMAC3Scenario

46 @staticmethod

47 def check_requirements(verbose: bool = False) -> bool:

48 """Check that SMAC3 is installed."""

49 return True # Is automatically installed with Sparkle

51 @staticmethod

52 def download_requirements() -> None:

53 """Download SMAC3."""

54 return # Nothing to do

56 def configure(

57 self: SMAC3,

58 scenario: SMAC3Scenario,

59 data_target: PerformanceDataFrame,

60 validate_after: bool = True,

61 sbatch_options: list[str] = [],

62 slurm_prepend: str | list[str] | Path = None,

63 num_parallel_jobs: int = None,

64 base_dir: Path = None,

65 run_on: Runner = Runner.SLURM,

66 ) -> list[Run]:

67 """Start configuration job.

69 Args:

70 scenario: ConfigurationScenario object

71 data_target: PerformanceDataFrame where to store the found configurations

72 validate_after: Whether the Validator will be called after the configuration

73 sbatch_options: List of slurm batch options to use

74 slurm_prepend: Slurm script to prepend to the sbatch

75 num_parallel_jobs: The maximum number of jobs to run parallel.

76 base_dir: The path where the sbatch scripts will be created for Slurm.

77 run_on: On which platform to run the jobs. Default: Slurm.

79 Returns:

80 A RunRunner Run object.

81 """

82 scenario.create_scenario()

83 if (

84 scenario.smac3_scenario.walltime_limit

85 == scenario.smac3_scenario.cputime_limit

86 == np.inf

87 ):

88 print("WARNING: Starting SMAC3 scenario without any time limit.")

89 configuration_ids = scenario.configuration_ids

91 # Scenario file also has a seed, but not for all type of configurators

92 seeds = [random.randint(0, 2**32 - 1) for _ in range(scenario.number_of_runs)]

93 num_parallel_jobs = num_parallel_jobs or scenario.number_of_runs

94 # We do not require the configurator CLI as its already our own python wrapper

95 cmds = [

96 f"python3 {self.configurator_target.absolute()} "

97 f"{scenario.scenario_file_path.absolute()} {configuration_id} {seed} "

98 f"{data_target.csv_filepath}"

99 for configuration_id, seed in zip(configuration_ids, seeds)

100 ]

101 return super().configure(

102 configuration_commands=cmds,

103 data_target=data_target,

104 output=None,

105 scenario=scenario,

106 configuration_ids=configuration_ids,

107 validate_after=validate_after,

108 sbatch_options=sbatch_options,

109 slurm_prepend=slurm_prepend,

110 num_parallel_jobs=num_parallel_jobs,

111 base_dir=base_dir,

112 run_on=run_on,

113 )

114

115 @staticmethod

116 def organise_output(

117 output_source: Path,

118 output_target: Path,

119 scenario: SMAC3Scenario,

120 configuration_id: str,

121 ) -> None | str:

122 """Method to restructure and clean up after a single configurator call."""

123 import json

124

125 if not output_source.exists():

126 print(f"SMAC3 ERROR: Output source file does not exist! [{output_source}]")

127 return

128 results_dict = json.load(output_source.open("r"))

129 configurations = [value for _, value in results_dict["configs"].items()]

130 config_evals = [[] for _ in range(len(configurations))]

131 objective = scenario.sparkle_objective

132 for entry in results_dict["data"]:

133 smac_conf_id = entry["config_id"]

134 score = entry["cost"]

135 # SMAC3 configuration ids start at 1

136 config_evals[smac_conf_id - 1].append(score)

137 config_evals = [

138 objective.instance_aggregator(evaluations) for evaluations in config_evals

139 ]

140 best_config = configurations[

141 config_evals.index(objective.solver_aggregator(config_evals))

142 ]

143 best_config["configuration_id"] = configuration_id

144 return Configurator.save_configuration(

145 scenario, configuration_id, best_config, output_target

146 )

147

148 def get_status_from_logs(self: SMAC3) -> None:

149 """Method to scan the log files of the configurator for warnings."""

150 raise NotImplementedError

151

152 @staticmethod

153 def convert_status(status: SolverStatus) -> SmacStatusType:

154 """Converts Sparkle Solver status to SMAC3 target status."""

155 mapping = {

156 SolverStatus.SUCCESS: SmacStatusType.SUCCESS,

157 SolverStatus.CRASHED: SmacStatusType.CRASHED,

158 SolverStatus.TIMEOUT: SmacStatusType.TIMEOUT,

159 SolverStatus.WRONG: SmacStatusType.CRASHED,

160 SolverStatus.UNKNOWN: SmacStatusType.CRASHED,

161 SolverStatus.ERROR: SmacStatusType.CRASHED,

162 SolverStatus.KILLED: SmacStatusType.TIMEOUT,

163 SolverStatus.SAT: SmacStatusType.SUCCESS,

164 SolverStatus.UNSAT: SmacStatusType.SUCCESS,

165 }

166 return mapping[status]

167

168

169class SMAC3Scenario(ConfigurationScenario):

170 """Class to handle SMAC3 configuration scenarios."""

171

172 def __init__(

173 self: SMAC3Scenario,

174 solver: Solver,

175 instance_set: InstanceSet,

176 sparkle_objectives: list[SparkleObjective],

177 number_of_runs: int,

178 parent_directory: Path,

179 solver_cutoff_time: int = None,

180 smac_facade: smacfacades.AbstractFacade

181 | str = smacfacades.AlgorithmConfigurationFacade,

182 crash_cost: float | list[float] = np.inf,

183 termination_cost_threshold: float | list[float] = np.inf,

184 walltime_limit: float = np.inf,

185 cputime_limit: float = np.inf,

186 solver_calls: int = None,

187 use_default_config: bool = False,

188 feature_data: FeatureDataFrame | Path = None,

189 min_budget: float | int | None = None,

190 max_budget: float | int | None = None,

191 seed: int = -1,

192 n_workers: int = 1,

193 max_ratio: float = None,

194 smac3_output_directory: Path = Path(),

195 timestamp: str = None,

196 ) -> None:

197 """Initialize scenario paths and names.

198

199 Args:

200 solver: Solver

201 The solver to use for configuration.

202 instance_set: InstanceSet

203 The instance set to use for configuration.

204 sparkle_objectives: list[SparkleObjective]

205 The objectives to optimize.

206 number_of_runs: int

207 The number of times this scenario will be executed with different seeds.

208 parent_directory: Path

209 The parent directory where the configuration files will be stored.

210 solver_cutoff_time: int

211 Maximum CPU runtime in seconds that each solver call (trial)

212 is allowed to run. Is managed by RunSolver, not pynisher.

213 smac_facade: AbstractFacade, defaults to AlgorithmConfigurationFacade

214 The SMAC facade to use for Optimisation.

215 crash_cost: float | list[float], defaults to np.inf

216 Defines the cost for a failed trial. In case of multi-objective,

217 each objective can be associated with a different cost.

218 termination_cost_threshold: float | list[float], defaults to np.inf

219 Defines a cost threshold when the optimization should stop. In case of

220 multi-objective, each objective *must* be associated with a cost.

221 The optimization stops when all objectives crossed the threshold.

222 walltime_limit: float, defaults to np.inf

223 The maximum time in seconds that SMAC is allowed to run. Only counts

224 solver time.

225 cputime_limit: float, defaults to np.inf

226 The maximum CPU time in seconds that SMAC is allowed to run. Only counts

227 solver time.

228 solver_calls: int, defaults to None

229 The maximum number of trials (combination of configuration, seed, budget,

230 and instance, depending on the task) to run. If left as None, will be

231 calculated as int(cutoff time / cputime or walltime limit)

232 use_default_config: bool, defaults to False

233 If True, the configspace's default configuration is evaluated in the

234 initial design. For historic benchmark reasons, this is False by default.

235 Notice, that this will result in n_configs + 1 for the initial design.

236 Respecting n_trials, this will result in one fewer evaluated

237 configuration in the optimization.

238 instances: list[str] | None, defaults to None

239 Names of the instances to use. If None, no instances are used. Instances

240 could be dataset names, seeds, subsets, etc.

241 feature_data: FeatureDataFrame or Path, defaults to None

242 Instances can be associated with features. For example, meta data of

243 the dataset (mean, var, ...) can be incorporated which are then further

244 used to expand the training data of the surrogate model. If Path, loaded

245 from file. When no features are given, uses index as instance features.

246 min_budget: float | int | None, defaults to None

247 The minimum budget (epochs, subset size, number of instances, ...) that

248 is used for the optimization. Use this argument if you use multi-fidelity

249 or instance optimization.

250 max_budget: float | int | None, defaults to None

251 The maximum budget (epochs, subset size, number of instances, ...) that

252 is used for the optimization. Use this argument if you use multi-fidelity

253 or instance optimization.

254 seed: int, defaults to -1

255 The seed is used to make results reproducible.

256 If seed is -1, SMAC will generate a random seed.

257 n_workers: int, defaults to 1

258 The number of workers to use for parallelization.

259 If `n_workers` is greather than 1, SMAC will use DASK to parallelize the

260 optimization.

261 max_ratio: float, defaults to None.

262 Facade uses at most scenario.n_trials * max_ratio number of

263 configurations in the initial design. Additional configurations are not

264 affected by this parameter. Not applicable to each facade.

265 smac3_output_directory: Path, defaults to Path()

266 The output subdirectory for the SMAC3 scenario. Defaults to the scenario

267 results directory.

268 timestamp: An optional timestamp for the directory name.

269 """

270 super().__init__(

271 solver,

272 instance_set,

273 sparkle_objectives,

274 number_of_runs,

275 parent_directory,

276 timestamp,

277 )

278 self.feature_data = feature_data

279 if isinstance(self.feature_data, Path): # Load from file

280 self.feature_data = FeatureDataFrame(self.feature_data)

281

282 # Facade parameters

283 self.smac_facade = smac_facade

284 if isinstance(self.smac_facade, str):

285 self.smac_facade = getattr(smacfacades, self.smac_facade)

286 self.max_ratio = max_ratio

287

288 if self.feature_data is not None:

289 instance_features = {

290 instance: self.feature_data.get_instance(str(instance))

291 for instance in self.instance_set.instance_paths

292 }

293 else:

294 # 'If no instance features are passed, the runhistory encoder can not

295 # distinguish between different instances and therefore returns the same data

296 # points with different values, all of which are used to train the surrogate

297 # model. Consider using instance indices as features.'

298 instance_features = {

299 name: [index] for index, name in enumerate(instance_set.instance_paths)

300 }

301

302 # NOTE: Patchfix; SMAC3 can handle MO but Sparkle also gives non-user specified

303 # objectives but not all class methods can handle it here yet

304 self.sparkle_objective = sparkle_objectives[0]

305

306 # NOTE: We don't use trial_walltime_limit as a way of managing resources

307 # As it uses pynisher to do it (python based) and our targets are maybe not

308 # RunSolver is the better option for accuracy.

309 self.solver_cutoff_time = solver_cutoff_time

310 if solver_calls is None: # If solver calls is None, try to calculate it

311 if self.solver_cutoff_time is not None and (cputime_limit or walltime_limit):

312 if cputime_limit:

313 solver_calls = int(cputime_limit / self.solver_cutoff_time)

314 elif walltime_limit:

315 solver_calls = int(walltime_limit / self.solver_cutoff_time)

316 else:

317 solver_calls = 100 # SMAC3 Default value

318 self.smac3_output_directory = smac3_output_directory

319 self.crash_cost = crash_cost

320 self.termination_cost_threshold = termination_cost_threshold

321 self.walltime_limit = walltime_limit

322 self.cputime_limit = cputime_limit

323 self.solver_calls = solver_calls

324 self.use_default_config = use_default_config

325 self.instance_features = instance_features

326 self.min_budget = min_budget

327 self.max_budget = max_budget

328 self.seed = seed

329 self.n_workers = n_workers

330 self.smac3_scenario: Optional[SmacScenario] = None

331

332 def create_scenario(self: SMAC3Scenario) -> None:

333 """This prepares all the necessary subdirectories related to configuration."""

334 super().create_scenario()

335 self.log_dir.mkdir(parents=True)

336 if self.smac3_scenario is None:

337 self.set_smac3_scenario()

338 self.create_scenario_file()

339

340 def set_smac3_scenario(self: SMAC3Scenario) -> None:

341 """Set the smac scenario object."""

342 self.smac3_scenario = SmacScenario(

343 configspace=self.solver.get_configuration_space(),

344 name=self.name,

345 output_directory=self.results_directory / self.smac3_output_directory,

346 deterministic=self.solver.deterministic,

347 objectives=[self.sparkle_objective.name],

348 crash_cost=self.crash_cost,

349 termination_cost_threshold=self.termination_cost_threshold,

350 walltime_limit=self.walltime_limit,

351 cputime_limit=self.cputime_limit,

352 n_trials=self.solver_calls,

353 use_default_config=self.use_default_config,

354 instances=self.instance_set.instance_paths,

355 instance_features=self.instance_features,

356 min_budget=self.min_budget,

357 max_budget=self.max_budget,

358 seed=self.seed,

359 n_workers=self.n_workers,

360 )

361

362 @property

363 def log_dir(self: SMAC3Scenario) -> Path:

364 """Return the path of the log directory."""

365 if self.directory:

366 return self.directory / "logs"

367 return None

368

369 @property

370 def configurator(self: SMAC3Scenario) -> SMAC3:

371 """Return the type of configurator the scenario belongs to."""

372 return SMAC3

373

374 def create_scenario_file(self: SMAC3Scenario) -> Path:

375 """Create a file with the configuration scenario."""

376 with self.scenario_file_path.open("w") as file:

377 for key, value in self.serialise().items():

378 file.write(f"{key} = {value}\n")

379

380 def serialise(self: SMAC3Scenario) -> dict:

381 """Serialize the configuration scenario."""

382 feature_data = str(self.feature_data.csv_filepath) if self.feature_data else None

383 return {

384 "solver": self.solver.directory,

385 "instance_set": self.instance_set.directory,

386 "sparkle_objectives": ",".join(self.smac3_scenario.objectives),

387 "solver_cutoff_time": self.solver_cutoff_time,

388 "number_of_runs": self.number_of_runs,

389 "smac_facade": self.smac_facade.__name__,

390 "crash_cost": self.smac3_scenario.crash_cost,

391 "termination_cost_threshold": self.smac3_scenario.termination_cost_threshold,

392 "walltime_limit": self.smac3_scenario.walltime_limit,

393 "cputime_limit": self.smac3_scenario.cputime_limit,

394 "solver_calls": self.smac3_scenario.n_trials,

395 "use_default_config": self.smac3_scenario.use_default_config,

396 "feature_data": feature_data,

397 "min_budget": self.smac3_scenario.min_budget,

398 "max_budget": self.smac3_scenario.max_budget,

399 "seed": self.smac3_scenario.seed,

400 "n_workers": self.smac3_scenario.n_workers,

401 }

402

403 @staticmethod

404 def from_file(scenario_file: Path, run_index: int = None) -> SMAC3Scenario:

405 """Reads scenario file and initalises ConfigurationScenario.

406

407 Args:

408 scenario_file: Path to scenario file.

409 run_index: If given, reads as the scenario with run_index for offset

410 in output directory and seed.

411

412 Returns:

413 ConfigurationScenario.

414 """

415 import ast

416

417 variables = {

418 keyvalue[0]: keyvalue[1].strip()

419 for keyvalue in (

420 line.split(" = ", maxsplit=1)

421 for line in scenario_file.open().readlines()

422 if line.strip() != ""

423 )

424 }

425 variables["solver"] = Solver(Path(variables["solver"]))

426 variables["instance_set"] = Instance_Set(Path(variables["instance_set"]))

427 variables["sparkle_objectives"] = [

428 resolve_objective(o) for o in variables["sparkle_objectives"].split(",")

429 ]

430 variables["parent_directory"] = scenario_file.parent.parent

431 variables["solver_cutoff_time"] = int(variables["solver_cutoff_time"])

432 variables["number_of_runs"] = int(variables["number_of_runs"])

433 variables["smac_facade"] = getattr(smacfacades, variables["smac_facade"])

434

435 # We need to support both lists of floats and single float (np.inf is fine)

436 if variables["crash_cost"].startswith("["):

437 variables["crash_cost"] = [

438 float(v) for v in ast.literal_eval(variables["crash_cost"])

439 ]

440 else:

441 variables["crash_cost"] = float(variables["crash_cost"])

442 if variables["termination_cost_threshold"].startswith("["):

443 variables["termination_cost_threshold"] = [

444 float(v)

445 for v in ast.literal_eval(variables["termination_cost_threshold"])

446 ]

447 else:

448 variables["termination_cost_threshold"] = float(

449 variables["termination_cost_threshold"]

450 )

451

452 variables["walltime_limit"] = float(variables["walltime_limit"])

453 variables["cputime_limit"] = float(variables["cputime_limit"])

454 variables["solver_calls"] = ast.literal_eval(variables["solver_calls"])

455 variables["use_default_config"] = ast.literal_eval(

456 variables["use_default_config"]

457 )

458

459 if variables["feature_data"] != "None":

460 variables["feature_data"] = Path(variables["feature_data"])

461 else:

462 variables["feature_data"] = None

463

464 variables["min_budget"] = ast.literal_eval(variables["min_budget"])

465 variables["max_budget"] = ast.literal_eval(variables["max_budget"])

466

467 variables["seed"] = ast.literal_eval(variables["seed"])

468 variables["n_workers"] = ast.literal_eval(variables["n_workers"])

469 if run_index is not None: # Offset

470 variables["seed"] += run_index

471 variables["smac3_output_directory"] = Path(f"run_{run_index}")

472

473 timestamp = scenario_file.parent.name.split("_")[-1]

474 scenario = SMAC3Scenario(**variables, timestamp=timestamp)

475 scenario.set_smac3_scenario()

476 return scenario

Coverage for src / sparkle / configurator / implementations / smac3.py: 86%

160 statements