Coverage for sparkle/configurator/implementations/smac2.py: 74%

1"""Configurator classes to implement SMAC2 in Sparkle."""

2from __future__ import annotations

3from pathlib import Path

4import glob

5import shutil

6import math

8import pandas as pd

10from runrunner import Runner, Run

12from sparkle.tools.parameters import PCSConvention

13from sparkle.configurator.configurator import Configurator, ConfigurationScenario

14from sparkle.solver import Solver

15from sparkle.structures import PerformanceDataFrame, FeatureDataFrame

16from sparkle.instance import InstanceSet, Instance_Set

17from sparkle.types import SparkleObjective, resolve_objective

20class SMAC2(Configurator):

21 """Class for SMAC2 (Java) configurator."""

22 configurator_path = Path(__file__).parent.resolve() / "SMAC2"

23 configurator_executable = configurator_path / "smac"

24 configurator_target = configurator_path / "smac2_target_algorithm.py"

26 full_name = "Sequential Model-based Algorithm Configuration"

27 version = "2.10.03"

29 def __init__(self: SMAC2) -> None:

30 """Returns the SMAC2 configurator, Java SMAC V2.10.03."""

31 return super().__init__(

32 multi_objective_support=False)

34 @property

35 def name(self: SMAC2) -> str:

36 """Returns the name of the configurator."""

37 return SMAC2.__name__

39 @staticmethod

40 def scenario_class() -> ConfigurationScenario:

41 """Returns the SMAC2 scenario class."""

42 return SMAC2Scenario

44 @staticmethod

45 def check_requirements(verbose: bool = False) -> bool:

46 """Check that SMAC2 is installed."""

47 import warnings

48 if no_java := shutil.which("java") is None:

49 if verbose:

50 warnings.warn(

51 "SMAC2 requires Java 1.8.0_402, but Java is not installed. "

52 "Please ensure Java is installed."

53 )

54 if no_smac := not SMAC2.configurator_executable.exists():

55 if verbose:

56 warnings.warn(

57 "SMAC2 executable not found. Please ensure SMAC2 is installed "

58 f"in the expected Path ({SMAC2.configurator_path}).")

59 return not (no_java or no_smac)

61 @staticmethod

62 def download_requirements(

63 smac2_zip_url: str = "https://github.com/ADA-research/Sparkle/raw/refs/heads/"

64 "development/Resources/Configurators/SMAC2-v2.10.03.zip"

65 ) -> None:

66 """Download SMAC2."""

67 if SMAC2.configurator_executable.exists():

68 return # Already installed

69 from urllib.request import urlopen

70 import zipfile, io

71 r = urlopen(smac2_zip_url, timeout=60)

72 z = zipfile.ZipFile(io.BytesIO(r.read()))

73 z.extractall(SMAC2.configurator_path)

74 # Ensure execution rights

75 SMAC2.configurator_executable.chmod(0o755)

77 def configure(self: SMAC2,

78 scenario: SMAC2Scenario,

79 data_target: PerformanceDataFrame,

80 validate_after: bool = True,

81 sbatch_options: list[str] = [],

82 slurm_prepend: str | list[str] | Path = None,

83 num_parallel_jobs: int = None,

84 base_dir: Path = None,

85 run_on: Runner = Runner.SLURM) -> list[Run]:

86 """Start configuration job.

88 Args:

89 scenario: ConfigurationScenario object

90 data_target: PerformanceDataFrame where to store the found configurations

91 validate_after: Whether the configurations should be validated on the

92 train set afterwards.

93 sbatch_options: List of slurm batch options to use

94 num_parallel_jobs: The maximum number of jobs to run parallel.

95 base_dir: The path where the sbatch scripts will be created for Slurm.

96 run_on: On which platform to run the jobs. Default: Slurm.

98 Returns:

99 A RunRunner Run object.

100 """

101 scenario.create_scenario()

102 configuration_ids = scenario.configuration_ids

103 # TODO: Setting seeds like this is weird and should be inspected.

104 # It could be good to take perhaps a seed from the scenario and use that

105 # to generate a seed per run

106 seeds = [i for i in range(scenario.number_of_runs)]

107 output = [f"{(scenario.results_directory).absolute()}/"

108 f"{scenario.name}_{config_id}_smac.txt"

109 for config_id in configuration_ids]

110 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "

111 f"{SMAC2.__name__} {output_file} {data_target.csv_filepath} "

112 f"{scenario.scenario_file_path} {configuration_id} "

113 f"{SMAC2.configurator_executable.absolute()} "

114 f"--scenario-file {scenario.scenario_file_path} "

115 f"--seed {seed} "

116 for output_file, configuration_id, seed

117 in zip(output, configuration_ids, seeds)]

118 if num_parallel_jobs is not None:

119 num_parallel_jobs = max(num_parallel_jobs, len(cmds))

120 return super().configure(

121 configuration_commands=cmds,

122 data_target=data_target,

123 output=output,

124 num_parallel_jobs=num_parallel_jobs,

125 scenario=scenario,

126 configuration_ids=configuration_ids,

127 validate_after=validate_after,

128 sbatch_options=sbatch_options,

129 slurm_prepend=slurm_prepend,

130 base_dir=base_dir,

131 run_on=run_on

132 )

133

134 @staticmethod

135 def organise_output(output_source: Path,

136 output_target: Path,

137 scenario: SMAC2Scenario,

138 configuration_id: str) -> None | dict:

139 """Retrieves configuration from SMAC file and places them in output."""

140 call_key = SMAC2.configurator_target.name

141 # Last line describing a call is the best found configuration

142 for line in reversed(output_source.open("r").readlines()):

143 if call_key in line:

144 call_str = line.split(call_key, maxsplit=1)[1].strip()

145 # The Configuration appears after the first 7 arguments

146 configuration = call_str.split(" ", 8)[-1]

147 break

148 configuration = Solver.config_str_to_dict(configuration)

149 configuration["configuration_id"] = configuration_id

150 return Configurator.save_configuration(scenario, configuration_id,

151 configuration, output_target)

152

153 @staticmethod

154 def get_smac_run_obj(objective: SparkleObjective) -> str:

155 """Return the SMAC run objective based on the Performance Measure.

156

157 Returns:

158 A string that represents the run objective set in the settings.

159 """

160 if objective.time:

161 return "RUNTIME"

162 return "QUALITY"

163

164 def get_status_from_logs(self: SMAC2, base_dir: Path) -> None:

165 """Method to scan the log files of the configurator for warnings."""

166 if not base_dir.exists():

167 return

168 print(f"Checking the log files of configurator {type(self).__name__} for "

169 "warnings...")

170 scenarios = [f for f in base_dir.iterdir() if f.is_dir()]

171 for scenario in scenarios:

172 log_dir = scenario / "outdir_train_configuration" \

173 / (scenario.name + "_scenario")

174 warn_files = glob.glob(str(log_dir) + "/log-warn*")

175 non_empty = [log_file for log_file in warn_files

176 if Path(log_file).stat().st_size > 0]

177 if len(non_empty) > 0:

178 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see "

179 "the following log file(s) for more information:")

180 for log_file in non_empty:

181 print(f"\t-{log_file}")

182 else:

183 print(f"Scenario {scenario.name} has no warnings.")

184

185

186class SMAC2Scenario(ConfigurationScenario):

187 """Class to handle SMAC2 configuration scenarios."""

188 def __init__(self: SMAC2Scenario,

189 solver: Solver,

190 instance_set: InstanceSet,

191 sparkle_objectives: list[SparkleObjective],

192 number_of_runs: int,

193 parent_directory: Path,

194 solver_calls: int = None,

195 max_iterations: int = None,

196 cpu_time: int = None,

197 wallclock_time: int = None,

198 solver_cutoff_time: int = None,

199 target_cutoff_length: str = None,

200 cli_cores: int = None,

201 use_cpu_time_in_tunertime: bool = None,

202 feature_data: FeatureDataFrame | Path = None)\

203 -> None:

204 """Initialize scenario paths and names.

205

206 Args:

207 solver: Solver that should be configured.

208 instance_set: Instances object for the scenario.

209 sparkle_objectives: SparkleObjectives used for each run of the configuration.

210 Will be simplified to the first objective.

211 number_of_runs: The number of configurator runs to perform

212 for configuring the solver.

213 parent_directory: Directory in which the scenario should be created.

214 solver_calls: The number of times the solver is called for each

215 configuration run

216 max_iterations: The maximum number of iterations allowed for each

217 configuration run. [iteration-limit, numIterations, numberOfIterations]

218 cpu_time: The time budget allocated for each configuration run. (cpu)

219 wallclock_time: The time budget allocated for each configuration run.

220 (wallclock)

221 solver_cutoff_time: The maximum time allowed for each solver call run during

222 configuration.

223 target_cutoff_length: A domain specific measure of when the algorithm

224 should consider itself done.

225 cli_cores: int

226 The number of cores to use to execute runs. Defaults in SMAC2 to 1.

227 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for

228 budget deduction. Defaults in SMAC2 to True.

229 feature_data: If features are used, this contains the feature data.

230 If it is a FeatureDataFrame, will convert values to SMAC2 format.

231 If it is a Path, will pass the path to SMAC2.

232 Defaults to None.

233 """

234 super().__init__(solver, instance_set, sparkle_objectives,

235 number_of_runs, parent_directory)

236 self.solver = solver

237 self.instance_set = instance_set

238

239 self.sparkle_objective = sparkle_objectives[0]

240 self.solver_calls = solver_calls

241 self.cpu_time = cpu_time

242 self.wallclock_time = wallclock_time

243 self.solver_cutoff_time = solver_cutoff_time

244 self.cutoff_length = target_cutoff_length

245 self.max_iterations = max_iterations

246 self.cli_cores = cli_cores

247 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime

248

249 self.feature_data = feature_data

250 self.feature_file_path = None

251 if self.feature_data:

252 if isinstance(self.feature_data, FeatureDataFrame):

253 # Convert feature data to SMAC2 format

254 data_dict = {}

255 for instance in self.instance_set.instance_paths:

256 data_dict[str(instance)] = feature_data.get_instance(str(instance))

257

258 self.feature_data = pd.DataFrame.from_dict(

259 data_dict, orient="index",

260 columns=[f"Feature{index+1}"

261 for index in range(feature_data.num_features)])

262

263 def map_nan(x: str) -> int:

264 """Map non-numeric values with -512 (Pre-defined by SMAC2)."""

265 if math.isnan(x):

266 return -512.0

267 try:

268 return float(x)

269 except Exception:

270 return -512.0

271

272 self.feature_data = self.feature_data.map(map_nan)

273 self.feature_file_path =\

274 self.directory / f"{self.instance_set.name}_features.csv"

275 elif isinstance(self.feature_data, Path): # Read from Path

276 self.feature_file_path = feature_data

277 self.feature_data = pd.read_csv(self.feature_file_path,

278 index_col=0)

279 else:

280 print(f"WARNING: Feature data is of type {type(feature_data)}. "

281 "Expected FeatureDataFrame or Path.")

282

283 # Scenario Paths

284 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"

285

286 # SMAC2 Specific

287 self.outdir_train = self.directory / "outdir_train_configuration"

288

289 @property

290 def configurator(self: SMAC2Scenario) -> SMAC2:

291 """Return the type of configurator the scenario belongs to."""

292 return SMAC2

293

294 def create_scenario(self: SMAC2Scenario) -> None:

295 """Create scenario with solver and instances in the parent directory.

296

297 This prepares all the necessary subdirectories related to configuration.

298

299 Args:

300 parent_directory: Directory in which the scenario should be created.

301 """

302 # Prepare scenario directory

303 shutil.rmtree(self.directory, ignore_errors=True)

304 self.directory.mkdir(parents=True)

305 # Create empty directories as needed

306 self.outdir_train.mkdir()

307 self.tmp.mkdir()

308 self.validation.mkdir()

309 self.results_directory.mkdir(parents=True) # Prepare results directory

310

311 self._prepare_instances()

312

313 if self.feature_data is not None:

314 self._create_feature_file()

315

316 self.create_scenario_file()

317

318 def create_scenario_file(

319 self: SMAC2Scenario,

320 configurator_target: Path = SMAC2.configurator_target,

321 pcs_port: PCSConvention = PCSConvention.SMAC) -> Path:

322 """Create a file with the configuration scenario.

323

324 Writes supplementary information to the target algorithm (algo =) as:

325 algo = {configurator_target} {solver_directory} {sparkle_objective}

326 """

327 super().create_scenario_file()

328 with self.scenario_file_path.open("w") as file:

329 file.write(f"algo = {configurator_target.absolute()} "

330 f"{self.solver.directory} {self.tmp} {self.sparkle_objective} \n"

331 f"deterministic = {1 if self.solver.deterministic else 0}\n"

332 f"run_obj = {self._get_performance_measure()}\n"

333 f"cutoffTime = {self.solver_cutoff_time}\n"

334 f"paramfile = {self.solver.get_pcs_file(pcs_port)}\n"

335 f"outdir = {self.outdir_train}\n"

336 f"instance_file = {self.instance_file_path}\n"

337 f"test_instance_file = {self.instance_file_path}\n")

338 if self.cutoff_length is not None:

339 file.write(f"cutoff_length = {self.cutoff_length}\n")

340 if self.max_iterations is not None:

341 file.write(f"iteration-limit = {self.max_iterations}\n")

342 if self.wallclock_time is not None:

343 file.write(f"wallclock-limit = {self.wallclock_time}\n")

344 if self.cpu_time is not None:

345 file.write(f"cputime-limit = {self.cpu_time}\n")

346 if self.solver_calls is not None:

347 file.write(f"runcount-limit = {self.solver_calls}\n")

348 if self.cli_cores is not None:

349 file.write(f"cli-cores = {self.cli_cores}")

350 if self.feature_data is not None:

351 file.write(f"feature_file = {self.feature_file_path}\n")

352 if self.use_cpu_time_in_tunertime is not None:

353 file.write("use-cpu-time-in-tunertime = "

354 f"{self.use_cpu_time_in_tunertime}\n")

355 # We don't let SMAC do the validation

356 file.write("validation = false" + "\n")

357 return self.scenario_file_path

358

359 def _prepare_instances(self: SMAC2Scenario) -> None:

360 """Create instance list file without instance specifics."""

361 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True)

362 with self.instance_file_path.open("w+") as file:

363 for instance_path in self.instance_set._instance_paths:

364 file.write(f"{instance_path}\n")

365

366 def _create_feature_file(self: SMAC2Scenario) -> None:

367 """Create CSV file from feature data."""

368 self.feature_data.to_csv(self.feature_file_path,

369 index_label="INSTANCE_NAME")

370

371 def _get_performance_measure(self: SMAC2Scenario) -> str:

372 """Retrieve the performance measure of the SparkleObjective.

373

374 Returns:

375 Performance measure of the sparkle objective

376 """

377 if self.sparkle_objective.time:

378 return "RUNTIME"

379 return "QUALITY"

380

381 def serialise(self: SMAC2Scenario) -> dict:

382 """Transform ConfigurationScenario to dictionary format."""

383 return {

384 "number_of_runs": self.number_of_runs,

385 "solver_calls": self.solver_calls,

386 "cpu_time": self.cpu_time,

387 "wallclock_time": self.wallclock_time,

388 "solver_cutoff_time": self.solver_cutoff_time,

389 "cutoff_length": self.cutoff_length,

390 "max_iterations": self.max_iterations,

391 "sparkle_objective": self.sparkle_objective.name,

392 "feature_data": self.feature_file_path,

393 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime

394 }

395

396 @staticmethod

397 def from_file(scenario_file: Path) -> SMAC2Scenario:

398 """Reads scenario file and initalises SMAC2Scenario."""

399 config = {keyvalue[0]: keyvalue[1]

400 for keyvalue in (line.strip().split(" = ", maxsplit=1)

401 for line in scenario_file.open().readlines()

402 if line.strip() != "")}

403

404 # Collect relevant settings

405 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None

406 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \

407 else None

408 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \

409 else None

410 max_iterations = int(config["iteration-limit"]) if "iteration-limit" in config \

411 else None

412 use_cpu_time_in_tunertime = config["use-cputime-in-tunertime"]\

413 if "use-cputime-in-tunertime" in config else None

414 cli_cores = config["cli-cores"] if "cli-cores" in config else None

415

416 _, solver_path, _, objective_str = config["algo"].split(" ")

417 objective = resolve_objective(objective_str)

418 solver = Solver(Path(solver_path.strip()))

419 # Extract the instance set from the instance file

420 instance_file_path = Path(config["instance_file"])

421 instance_set_path = Path(instance_file_path.open().readline().strip()).parent

422 instance_set = Instance_Set(Path(instance_set_path))

423 results_folder = scenario_file.parent / "results"

424 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()]

425 number_of_runs = len(state_run_dirs)

426 feature_data_path = None

427 if "feature_file" in config:

428 feature_data_path = Path(config["feature_file"])

429 return SMAC2Scenario(solver,

430 instance_set,

431 [objective],

432 number_of_runs,

433 instance_file_path.parent.parent,

434 solver_calls,

435 max_iterations,

436 cpu_time,

437 wallclock_limit,

438 int(config["cutoffTime"]),

439 config["cutoff_length"],

440 cli_cores,

441 use_cpu_time_in_tunertime,

442 feature_data_path)