Coverage for sparkle/configurator/implementations/smac2.py: 74%

194 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 13:21 +0000

1"""Configurator classes to implement SMAC2 in Sparkle.""" 

2from __future__ import annotations 

3from pathlib import Path 

4import glob 

5import shutil 

6import math 

7 

8import pandas as pd 

9 

10from runrunner import Runner, Run 

11 

12from sparkle.tools.parameters import PCSConvention 

13from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

14from sparkle.solver import Solver 

15from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

16from sparkle.instance import InstanceSet, Instance_Set 

17from sparkle.types import SparkleObjective, resolve_objective 

18 

19 

20class SMAC2(Configurator): 

21 """Class for SMAC2 (Java) configurator.""" 

22 configurator_path = Path(__file__).parent.resolve() / "SMAC2" 

23 configurator_executable = configurator_path / "smac" 

24 configurator_target = configurator_path / "smac2_target_algorithm.py" 

25 

26 full_name = "Sequential Model-based Algorithm Configuration" 

27 version = "2.10.03" 

28 

29 def __init__(self: SMAC2) -> None: 

30 """Returns the SMAC2 configurator, Java SMAC V2.10.03.""" 

31 return super().__init__( 

32 multi_objective_support=False) 

33 

34 @property 

35 def name(self: SMAC2) -> str: 

36 """Returns the name of the configurator.""" 

37 return SMAC2.__name__ 

38 

39 @staticmethod 

40 def scenario_class() -> ConfigurationScenario: 

41 """Returns the SMAC2 scenario class.""" 

42 return SMAC2Scenario 

43 

44 @staticmethod 

45 def check_requirements(verbose: bool = False) -> bool: 

46 """Check that SMAC2 is installed.""" 

47 import warnings 

48 if no_java := shutil.which("java") is None: 

49 if verbose: 

50 warnings.warn( 

51 "SMAC2 requires Java 1.8.0_402, but Java is not installed. " 

52 "Please ensure Java is installed." 

53 ) 

54 if no_smac := not SMAC2.configurator_executable.exists(): 

55 if verbose: 

56 warnings.warn( 

57 "SMAC2 executable not found. Please ensure SMAC2 is installed " 

58 f"in the expected Path ({SMAC2.configurator_path}).") 

59 return not (no_java or no_smac) 

60 

61 @staticmethod 

62 def download_requirements( 

63 smac2_zip_url: str = "https://github.com/ADA-research/Sparkle/raw/refs/heads/" 

64 "development/Resources/Configurators/SMAC2-v2.10.03.zip" 

65 ) -> None: 

66 """Download SMAC2.""" 

67 if SMAC2.configurator_executable.exists(): 

68 return # Already installed 

69 from urllib.request import urlopen 

70 import zipfile, io 

71 r = urlopen(smac2_zip_url, timeout=60) 

72 z = zipfile.ZipFile(io.BytesIO(r.read())) 

73 z.extractall(SMAC2.configurator_path) 

74 # Ensure execution rights 

75 SMAC2.configurator_executable.chmod(0o755) 

76 

77 def configure(self: SMAC2, 

78 scenario: SMAC2Scenario, 

79 data_target: PerformanceDataFrame, 

80 validate_after: bool = True, 

81 sbatch_options: list[str] = [], 

82 slurm_prepend: str | list[str] | Path = None, 

83 num_parallel_jobs: int = None, 

84 base_dir: Path = None, 

85 run_on: Runner = Runner.SLURM) -> list[Run]: 

86 """Start configuration job. 

87 

88 Args: 

89 scenario: ConfigurationScenario object 

90 data_target: PerformanceDataFrame where to store the found configurations 

91 validate_after: Whether the configurations should be validated on the 

92 train set afterwards. 

93 sbatch_options: List of slurm batch options to use 

94 num_parallel_jobs: The maximum number of jobs to run parallel. 

95 base_dir: The path where the sbatch scripts will be created for Slurm. 

96 run_on: On which platform to run the jobs. Default: Slurm. 

97 

98 Returns: 

99 A RunRunner Run object. 

100 """ 

101 scenario.create_scenario() 

102 configuration_ids = scenario.configuration_ids 

103 # TODO: Setting seeds like this is weird and should be inspected. 

104 # It could be good to take perhaps a seed from the scenario and use that 

105 # to generate a seed per run 

106 seeds = [i for i in range(scenario.number_of_runs)] 

107 output = [f"{(scenario.results_directory).absolute()}/" 

108 f"{scenario.name}_{config_id}_smac.txt" 

109 for config_id in configuration_ids] 

110 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} " 

111 f"{SMAC2.__name__} {output_file} {data_target.csv_filepath} " 

112 f"{scenario.scenario_file_path} {configuration_id} " 

113 f"{SMAC2.configurator_executable.absolute()} " 

114 f"--scenario-file {scenario.scenario_file_path} " 

115 f"--seed {seed} " 

116 for output_file, configuration_id, seed 

117 in zip(output, configuration_ids, seeds)] 

118 if num_parallel_jobs is not None: 

119 num_parallel_jobs = max(num_parallel_jobs, len(cmds)) 

120 return super().configure( 

121 configuration_commands=cmds, 

122 data_target=data_target, 

123 output=output, 

124 num_parallel_jobs=num_parallel_jobs, 

125 scenario=scenario, 

126 configuration_ids=configuration_ids, 

127 validate_after=validate_after, 

128 sbatch_options=sbatch_options, 

129 slurm_prepend=slurm_prepend, 

130 base_dir=base_dir, 

131 run_on=run_on 

132 ) 

133 

134 @staticmethod 

135 def organise_output(output_source: Path, 

136 output_target: Path, 

137 scenario: SMAC2Scenario, 

138 configuration_id: str) -> None | dict: 

139 """Retrieves configuration from SMAC file and places them in output.""" 

140 call_key = SMAC2.configurator_target.name 

141 # Last line describing a call is the best found configuration 

142 for line in reversed(output_source.open("r").readlines()): 

143 if call_key in line: 

144 call_str = line.split(call_key, maxsplit=1)[1].strip() 

145 # The Configuration appears after the first 7 arguments 

146 configuration = call_str.split(" ", 8)[-1] 

147 break 

148 configuration = Solver.config_str_to_dict(configuration) 

149 configuration["configuration_id"] = configuration_id 

150 return Configurator.save_configuration(scenario, configuration_id, 

151 configuration, output_target) 

152 

153 @staticmethod 

154 def get_smac_run_obj(objective: SparkleObjective) -> str: 

155 """Return the SMAC run objective based on the Performance Measure. 

156 

157 Returns: 

158 A string that represents the run objective set in the settings. 

159 """ 

160 if objective.time: 

161 return "RUNTIME" 

162 return "QUALITY" 

163 

164 def get_status_from_logs(self: SMAC2, base_dir: Path) -> None: 

165 """Method to scan the log files of the configurator for warnings.""" 

166 if not base_dir.exists(): 

167 return 

168 print(f"Checking the log files of configurator {type(self).__name__} for " 

169 "warnings...") 

170 scenarios = [f for f in base_dir.iterdir() if f.is_dir()] 

171 for scenario in scenarios: 

172 log_dir = scenario / "outdir_train_configuration" \ 

173 / (scenario.name + "_scenario") 

174 warn_files = glob.glob(str(log_dir) + "/log-warn*") 

175 non_empty = [log_file for log_file in warn_files 

176 if Path(log_file).stat().st_size > 0] 

177 if len(non_empty) > 0: 

178 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see " 

179 "the following log file(s) for more information:") 

180 for log_file in non_empty: 

181 print(f"\t-{log_file}") 

182 else: 

183 print(f"Scenario {scenario.name} has no warnings.") 

184 

185 

186class SMAC2Scenario(ConfigurationScenario): 

187 """Class to handle SMAC2 configuration scenarios.""" 

188 def __init__(self: SMAC2Scenario, 

189 solver: Solver, 

190 instance_set: InstanceSet, 

191 sparkle_objectives: list[SparkleObjective], 

192 number_of_runs: int, 

193 parent_directory: Path, 

194 solver_calls: int = None, 

195 max_iterations: int = None, 

196 cpu_time: int = None, 

197 wallclock_time: int = None, 

198 solver_cutoff_time: int = None, 

199 target_cutoff_length: str = None, 

200 cli_cores: int = None, 

201 use_cpu_time_in_tunertime: bool = None, 

202 feature_data: FeatureDataFrame | Path = None)\ 

203 -> None: 

204 """Initialize scenario paths and names. 

205 

206 Args: 

207 solver: Solver that should be configured. 

208 instance_set: Instances object for the scenario. 

209 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

210 Will be simplified to the first objective. 

211 number_of_runs: The number of configurator runs to perform 

212 for configuring the solver. 

213 parent_directory: Directory in which the scenario should be created. 

214 solver_calls: The number of times the solver is called for each 

215 configuration run 

216 max_iterations: The maximum number of iterations allowed for each 

217 configuration run. [iteration-limit, numIterations, numberOfIterations] 

218 cpu_time: The time budget allocated for each configuration run. (cpu) 

219 wallclock_time: The time budget allocated for each configuration run. 

220 (wallclock) 

221 solver_cutoff_time: The maximum time allowed for each solver call run during 

222 configuration. 

223 target_cutoff_length: A domain specific measure of when the algorithm 

224 should consider itself done. 

225 cli_cores: int 

226 The number of cores to use to execute runs. Defaults in SMAC2 to 1. 

227 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for 

228 budget deduction. Defaults in SMAC2 to True. 

229 feature_data: If features are used, this contains the feature data. 

230 If it is a FeatureDataFrame, will convert values to SMAC2 format. 

231 If it is a Path, will pass the path to SMAC2. 

232 Defaults to None. 

233 """ 

234 super().__init__(solver, instance_set, sparkle_objectives, 

235 number_of_runs, parent_directory) 

236 self.solver = solver 

237 self.instance_set = instance_set 

238 

239 self.sparkle_objective = sparkle_objectives[0] 

240 self.solver_calls = solver_calls 

241 self.cpu_time = cpu_time 

242 self.wallclock_time = wallclock_time 

243 self.solver_cutoff_time = solver_cutoff_time 

244 self.cutoff_length = target_cutoff_length 

245 self.max_iterations = max_iterations 

246 self.cli_cores = cli_cores 

247 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime 

248 

249 self.feature_data = feature_data 

250 self.feature_file_path = None 

251 if self.feature_data: 

252 if isinstance(self.feature_data, FeatureDataFrame): 

253 # Convert feature data to SMAC2 format 

254 data_dict = {} 

255 for instance in self.instance_set.instance_paths: 

256 data_dict[str(instance)] = feature_data.get_instance(str(instance)) 

257 

258 self.feature_data = pd.DataFrame.from_dict( 

259 data_dict, orient="index", 

260 columns=[f"Feature{index+1}" 

261 for index in range(feature_data.num_features)]) 

262 

263 def map_nan(x: str) -> int: 

264 """Map non-numeric values with -512 (Pre-defined by SMAC2).""" 

265 if math.isnan(x): 

266 return -512.0 

267 try: 

268 return float(x) 

269 except Exception: 

270 return -512.0 

271 

272 self.feature_data = self.feature_data.map(map_nan) 

273 self.feature_file_path =\ 

274 self.directory / f"{self.instance_set.name}_features.csv" 

275 elif isinstance(self.feature_data, Path): # Read from Path 

276 self.feature_file_path = feature_data 

277 self.feature_data = pd.read_csv(self.feature_file_path, 

278 index_col=0) 

279 else: 

280 print(f"WARNING: Feature data is of type {type(feature_data)}. " 

281 "Expected FeatureDataFrame or Path.") 

282 

283 # Scenario Paths 

284 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt" 

285 

286 # SMAC2 Specific 

287 self.outdir_train = self.directory / "outdir_train_configuration" 

288 

289 @property 

290 def configurator(self: SMAC2Scenario) -> SMAC2: 

291 """Return the type of configurator the scenario belongs to.""" 

292 return SMAC2 

293 

294 def create_scenario(self: SMAC2Scenario) -> None: 

295 """Create scenario with solver and instances in the parent directory. 

296 

297 This prepares all the necessary subdirectories related to configuration. 

298 

299 Args: 

300 parent_directory: Directory in which the scenario should be created. 

301 """ 

302 # Prepare scenario directory 

303 shutil.rmtree(self.directory, ignore_errors=True) 

304 self.directory.mkdir(parents=True) 

305 # Create empty directories as needed 

306 self.outdir_train.mkdir() 

307 self.tmp.mkdir() 

308 self.validation.mkdir() 

309 self.results_directory.mkdir(parents=True) # Prepare results directory 

310 

311 self._prepare_instances() 

312 

313 if self.feature_data is not None: 

314 self._create_feature_file() 

315 

316 self.create_scenario_file() 

317 

318 def create_scenario_file( 

319 self: SMAC2Scenario, 

320 configurator_target: Path = SMAC2.configurator_target, 

321 pcs_port: PCSConvention = PCSConvention.SMAC) -> Path: 

322 """Create a file with the configuration scenario. 

323 

324 Writes supplementary information to the target algorithm (algo =) as: 

325 algo = {configurator_target} {solver_directory} {sparkle_objective} 

326 """ 

327 super().create_scenario_file() 

328 with self.scenario_file_path.open("w") as file: 

329 file.write(f"algo = {configurator_target.absolute()} " 

330 f"{self.solver.directory} {self.tmp} {self.sparkle_objective} \n" 

331 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

332 f"run_obj = {self._get_performance_measure()}\n" 

333 f"cutoffTime = {self.solver_cutoff_time}\n" 

334 f"paramfile = {self.solver.get_pcs_file(pcs_port)}\n" 

335 f"outdir = {self.outdir_train}\n" 

336 f"instance_file = {self.instance_file_path}\n" 

337 f"test_instance_file = {self.instance_file_path}\n") 

338 if self.cutoff_length is not None: 

339 file.write(f"cutoff_length = {self.cutoff_length}\n") 

340 if self.max_iterations is not None: 

341 file.write(f"iteration-limit = {self.max_iterations}\n") 

342 if self.wallclock_time is not None: 

343 file.write(f"wallclock-limit = {self.wallclock_time}\n") 

344 if self.cpu_time is not None: 

345 file.write(f"cputime-limit = {self.cpu_time}\n") 

346 if self.solver_calls is not None: 

347 file.write(f"runcount-limit = {self.solver_calls}\n") 

348 if self.cli_cores is not None: 

349 file.write(f"cli-cores = {self.cli_cores}") 

350 if self.feature_data is not None: 

351 file.write(f"feature_file = {self.feature_file_path}\n") 

352 if self.use_cpu_time_in_tunertime is not None: 

353 file.write("use-cpu-time-in-tunertime = " 

354 f"{self.use_cpu_time_in_tunertime}\n") 

355 # We don't let SMAC do the validation 

356 file.write("validation = false" + "\n") 

357 return self.scenario_file_path 

358 

359 def _prepare_instances(self: SMAC2Scenario) -> None: 

360 """Create instance list file without instance specifics.""" 

361 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True) 

362 with self.instance_file_path.open("w+") as file: 

363 for instance_path in self.instance_set._instance_paths: 

364 file.write(f"{instance_path}\n") 

365 

366 def _create_feature_file(self: SMAC2Scenario) -> None: 

367 """Create CSV file from feature data.""" 

368 self.feature_data.to_csv(self.feature_file_path, 

369 index_label="INSTANCE_NAME") 

370 

371 def _get_performance_measure(self: SMAC2Scenario) -> str: 

372 """Retrieve the performance measure of the SparkleObjective. 

373 

374 Returns: 

375 Performance measure of the sparkle objective 

376 """ 

377 if self.sparkle_objective.time: 

378 return "RUNTIME" 

379 return "QUALITY" 

380 

381 def serialise(self: SMAC2Scenario) -> dict: 

382 """Transform ConfigurationScenario to dictionary format.""" 

383 return { 

384 "number_of_runs": self.number_of_runs, 

385 "solver_calls": self.solver_calls, 

386 "cpu_time": self.cpu_time, 

387 "wallclock_time": self.wallclock_time, 

388 "solver_cutoff_time": self.solver_cutoff_time, 

389 "cutoff_length": self.cutoff_length, 

390 "max_iterations": self.max_iterations, 

391 "sparkle_objective": self.sparkle_objective.name, 

392 "feature_data": self.feature_file_path, 

393 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime 

394 } 

395 

396 @staticmethod 

397 def from_file(scenario_file: Path) -> SMAC2Scenario: 

398 """Reads scenario file and initalises SMAC2Scenario.""" 

399 config = {keyvalue[0]: keyvalue[1] 

400 for keyvalue in (line.strip().split(" = ", maxsplit=1) 

401 for line in scenario_file.open().readlines() 

402 if line.strip() != "")} 

403 

404 # Collect relevant settings 

405 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None 

406 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \ 

407 else None 

408 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \ 

409 else None 

410 max_iterations = int(config["iteration-limit"]) if "iteration-limit" in config \ 

411 else None 

412 use_cpu_time_in_tunertime = config["use-cputime-in-tunertime"]\ 

413 if "use-cputime-in-tunertime" in config else None 

414 cli_cores = config["cli-cores"] if "cli-cores" in config else None 

415 

416 _, solver_path, _, objective_str = config["algo"].split(" ") 

417 objective = resolve_objective(objective_str) 

418 solver = Solver(Path(solver_path.strip())) 

419 # Extract the instance set from the instance file 

420 instance_file_path = Path(config["instance_file"]) 

421 instance_set_path = Path(instance_file_path.open().readline().strip()).parent 

422 instance_set = Instance_Set(Path(instance_set_path)) 

423 results_folder = scenario_file.parent / "results" 

424 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()] 

425 number_of_runs = len(state_run_dirs) 

426 feature_data_path = None 

427 if "feature_file" in config: 

428 feature_data_path = Path(config["feature_file"]) 

429 return SMAC2Scenario(solver, 

430 instance_set, 

431 [objective], 

432 number_of_runs, 

433 instance_file_path.parent.parent, 

434 solver_calls, 

435 max_iterations, 

436 cpu_time, 

437 wallclock_limit, 

438 int(config["cutoffTime"]), 

439 config["cutoff_length"], 

440 cli_cores, 

441 use_cpu_time_in_tunertime, 

442 feature_data_path)