Coverage for src/sparkle/configurator/implementations/smac2.py: 73%

204 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-15 14:11 +0000

1"""Configurator classes to implement SMAC2 in Sparkle.""" 

2 

3from __future__ import annotations 

4from pathlib import Path 

5import shutil 

6import math 

7import random 

8 

9import pandas as pd 

10 

11from runrunner import Runner, Run 

12 

13from sparkle.tools.parameters import PCSConvention 

14from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

15from sparkle.solver import Solver 

16from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

17from sparkle.instance import InstanceSet, Instance_Set 

18from sparkle.types import SparkleObjective, resolve_objective 

19 

20 

21class SMAC2(Configurator): 

22 """Class for SMAC2 (Java) configurator.""" 

23 

24 configurator_path = Path(__file__).parent.resolve() / "SMAC2" 

25 configurator_executable = configurator_path / "smac" 

26 configurator_target = configurator_path / "smac2_target_algorithm.py" 

27 

28 full_name = "Sequential Model-based Algorithm Configuration" 

29 version = "2.10.03" 

30 

31 def __init__(self: SMAC2) -> None: 

32 """Returns the SMAC2 configurator, Java SMAC V2.10.03.""" 

33 return super().__init__(multi_objective_support=False) 

34 

35 @property 

36 def name(self: SMAC2) -> str: 

37 """Returns the name of the configurator.""" 

38 return SMAC2.__name__ 

39 

40 @staticmethod 

41 def scenario_class() -> ConfigurationScenario: 

42 """Returns the SMAC2 scenario class.""" 

43 return SMAC2Scenario 

44 

45 @staticmethod 

46 def check_requirements(verbose: bool = False) -> bool: 

47 """Check that SMAC2 is installed.""" 

48 import warnings 

49 

50 if no_java := shutil.which("java") is None: 

51 if verbose: 

52 warnings.warn( 

53 "SMAC2 requires Java 1.8.0_402, but Java is not installed. " 

54 "Please ensure Java is installed." 

55 ) 

56 if no_smac := not SMAC2.configurator_executable.exists(): 

57 if verbose: 

58 warnings.warn( 

59 "SMAC2 executable not found. Please ensure SMAC2 is installed " 

60 f"in the expected Path ({SMAC2.configurator_path})." 

61 ) 

62 return not (no_java or no_smac) 

63 

64 @staticmethod 

65 def download_requirements( 

66 smac2_zip_url: str = "https://github.com/ADA-research/Sparkle/raw/refs/heads/" 

67 "development/Resources/Configurators/SMAC2-v2.10.03.zip", 

68 ) -> None: 

69 """Download SMAC2.""" 

70 if SMAC2.configurator_executable.exists(): 

71 return # Already installed 

72 from urllib.request import urlopen 

73 import zipfile 

74 import io 

75 

76 r = urlopen(smac2_zip_url, timeout=60) 

77 z = zipfile.ZipFile(io.BytesIO(r.read())) 

78 z.extractall(SMAC2.configurator_path) 

79 # Ensure execution rights 

80 SMAC2.configurator_executable.chmod(0o755) 

81 

82 def configure( 

83 self: SMAC2, 

84 scenario: SMAC2Scenario, 

85 data_target: PerformanceDataFrame, 

86 validate_after: bool = True, 

87 sbatch_options: list[str] = [], 

88 slurm_prepend: str | list[str] | Path = None, 

89 num_parallel_jobs: int = None, 

90 base_dir: Path = None, 

91 run_on: Runner = Runner.SLURM, 

92 ) -> list[Run]: 

93 """Start configuration job. 

94 

95 Args: 

96 scenario: ConfigurationScenario object 

97 data_target: PerformanceDataFrame where to store the found configurations 

98 validate_after: Whether the configurations should be validated on the 

99 train set afterwards. 

100 sbatch_options: List of slurm batch options to use. 

101 slurm_prepend: Slurm script to prepend to the sbatch. 

102 num_parallel_jobs: The maximum number of jobs to run parallel. 

103 base_dir: The path where the sbatch scripts will be created for Slurm. 

104 run_on: On which platform to run the jobs. Default: Slurm. 

105 

106 Returns: 

107 A RunRunner Run object. 

108 """ 

109 scenario.create_scenario() 

110 configuration_ids = scenario.configuration_ids 

111 

112 # The maximum seed size for SMAC2 is 999 999 999 

113 seeds = [random.randint(0, 10**9 - 1) for _ in range(scenario.number_of_runs)] 

114 output = [ 

115 f"{(scenario.results_directory).absolute()}/" 

116 f"{scenario.name}_{config_id}_smac.txt" 

117 for config_id in configuration_ids 

118 ] 

119 cmds = [ 

120 f"python3 {Configurator.configurator_cli_path.absolute()} " 

121 f"{SMAC2.__name__} {output_file} {data_target.csv_filepath} " 

122 f"{scenario.scenario_file_path} {configuration_id} " 

123 f"{SMAC2.configurator_executable.absolute()} " 

124 f"--scenario-file {scenario.scenario_file_path} " 

125 f"--seed {seed} " 

126 for output_file, configuration_id, seed in zip( 

127 output, configuration_ids, seeds 

128 ) 

129 ] 

130 if num_parallel_jobs is not None: 

131 num_parallel_jobs = max(num_parallel_jobs, len(cmds)) 

132 return super().configure( 

133 configuration_commands=cmds, 

134 data_target=data_target, 

135 output=output, 

136 num_parallel_jobs=num_parallel_jobs, 

137 scenario=scenario, 

138 configuration_ids=configuration_ids, 

139 validate_after=validate_after, 

140 sbatch_options=sbatch_options, 

141 slurm_prepend=slurm_prepend, 

142 base_dir=base_dir, 

143 run_on=run_on, 

144 ) 

145 

146 @staticmethod 

147 def organise_output( 

148 output_source: Path, 

149 output_target: Path, 

150 scenario: SMAC2Scenario, 

151 configuration_id: str, 

152 ) -> None | dict: 

153 """Retrieves configuration from SMAC file and places them in output.""" 

154 call_key = SMAC2.configurator_target.name 

155 # Last line describing a call is the best found configuration 

156 for line in reversed(output_source.open("r").readlines()): 

157 if call_key in line: 

158 call_str = line.split(call_key, maxsplit=1)[1].strip() 

159 # The Configuration appears after the first 7 arguments 

160 configuration = call_str.split(" ", 8)[-1] 

161 break 

162 configuration = Solver.config_str_to_dict(configuration) 

163 configuration["configuration_id"] = configuration_id 

164 return Configurator.save_configuration( 

165 scenario, configuration_id, configuration, output_target 

166 ) 

167 

168 @staticmethod 

169 def get_smac_run_obj(objective: SparkleObjective) -> str: 

170 """Return the SMAC run objective based on the Performance Measure. 

171 

172 Returns: 

173 A string that represents the run objective set in the settings. 

174 """ 

175 if objective.time: 

176 return "RUNTIME" 

177 return "QUALITY" 

178 

179 def get_status_from_logs(self: SMAC2, base_dir: Path) -> None: 

180 """Method to scan the log files of the configurator for warnings.""" 

181 if not base_dir.exists(): 

182 return 

183 print( 

184 f"Checking the log files of configurator {type(self).__name__} for " 

185 "warnings..." 

186 ) 

187 scenarios = [f for f in base_dir.iterdir() if f.is_dir()] 

188 for scenario in scenarios: 

189 log_dir = ( 

190 scenario / "outdir_train_configuration" / (scenario.name + "_scenario") 

191 ) 

192 # Collect all non empty log files paths 

193 warn_files = [f for f in log_dir.glob("log-warn*") if f.stat().st_size > 0] 

194 if len(warn_files) > 0: 

195 print( 

196 f"Scenario {scenario.name} has {len(warn_files)} warning(s), see " 

197 "the following log file(s) for more information:" 

198 ) 

199 for log_file in warn_files: 

200 print(f"\t-{log_file}") 

201 else: 

202 print(f"Scenario {scenario.name} has no warnings.") 

203 

204 

205class SMAC2Scenario(ConfigurationScenario): 

206 """Class to handle SMAC2 configuration scenarios.""" 

207 

208 def __init__( 

209 self: SMAC2Scenario, 

210 solver: Solver, 

211 instance_set: InstanceSet, 

212 sparkle_objectives: list[SparkleObjective], 

213 number_of_runs: int, 

214 parent_directory: Path, 

215 solver_calls: int = None, 

216 max_iterations: int = None, 

217 cpu_time: int = None, 

218 wallclock_time: int = None, 

219 solver_cutoff_time: int = None, 

220 target_cutoff_length: str = None, 

221 cli_cores: int = None, 

222 use_cpu_time_in_tunertime: bool = None, 

223 feature_data: FeatureDataFrame | Path = None, 

224 timestamp: str = None, 

225 ) -> None: 

226 """Initialize scenario paths and names. 

227 

228 Args: 

229 solver: Solver that should be configured. 

230 instance_set: Instances object for the scenario. 

231 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

232 Will be simplified to the first objective. 

233 number_of_runs: The number of configurator runs to perform 

234 for configuring the solver. 

235 parent_directory: Directory in which the scenario should be created. 

236 solver_calls: The number of times the solver is called for each 

237 configuration run 

238 max_iterations: The maximum number of iterations allowed for each 

239 configuration run. [iteration-limit, numIterations, numberOfIterations] 

240 cpu_time: The time budget allocated for each configuration run. (cpu) 

241 wallclock_time: The time budget allocated for each configuration run. 

242 (wallclock) 

243 solver_cutoff_time: The maximum time allowed for each solver call run during 

244 configuration. 

245 target_cutoff_length: A domain specific measure of when the algorithm 

246 should consider itself done. 

247 cli_cores: int 

248 The number of cores to use to execute runs. Defaults in SMAC2 to 1. 

249 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for 

250 budget deduction. Defaults in SMAC2 to True. 

251 feature_data: If features are used, this contains the feature data. 

252 If it is a FeatureDataFrame, will convert values to SMAC2 format. 

253 If it is a Path, will pass the path to SMAC2. 

254 Defaults to None. 

255 timestamp: An optional timestamp for the directory name. 

256 """ 

257 super().__init__( 

258 solver, 

259 instance_set, 

260 sparkle_objectives, 

261 number_of_runs, 

262 parent_directory, 

263 timestamp, 

264 ) 

265 self.solver = solver 

266 self.instance_set = instance_set 

267 

268 self.sparkle_objective = sparkle_objectives[0] 

269 self.solver_calls = solver_calls 

270 self.cpu_time = cpu_time 

271 self.wallclock_time = wallclock_time 

272 self.solver_cutoff_time = solver_cutoff_time 

273 self.cutoff_length = target_cutoff_length 

274 self.max_iterations = max_iterations 

275 self.cli_cores = cli_cores 

276 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime 

277 

278 self.feature_data = feature_data 

279 self._feature_file_path = None 

280 if self.feature_data: 

281 if isinstance(self.feature_data, FeatureDataFrame): 

282 # Convert feature data to SMAC2 format 

283 data_dict = {} 

284 for instance in self.instance_set.instance_paths: 

285 data_dict[str(instance)] = feature_data.get_instance(str(instance)) 

286 

287 self.feature_data = pd.DataFrame.from_dict( 

288 data_dict, 

289 orient="index", 

290 columns=[ 

291 f"Feature{index + 1}" 

292 for index in range(feature_data.num_features) 

293 ], 

294 ) 

295 

296 def map_nan(x: str) -> int: 

297 """Map non-numeric values with -512 (Pre-defined by SMAC2).""" 

298 if math.isnan(x): 

299 return -512.0 

300 try: 

301 return float(x) 

302 except Exception: 

303 return -512.0 

304 

305 self.feature_data = self.feature_data.map(map_nan) 

306 elif isinstance(self.feature_data, Path): # Read from Path 

307 self._feature_file_path = feature_data 

308 self.feature_data = pd.read_csv(self.feature_file_path, index_col=0) 

309 else: 

310 print( 

311 f"WARNING: Feature data is of type {type(feature_data)}. " 

312 "Expected FeatureDataFrame or Path." 

313 ) 

314 

315 @property 

316 def instance_file_path(self: SMAC2Scenario) -> Path: 

317 """Return the path of the instance file.""" 

318 if self.directory: 

319 return self.directory / f"{self.instance_set.name}.txt" 

320 return None 

321 

322 @property 

323 def outdir_train(self: SMAC2Scenario) -> Path: 

324 """Return the path of the train out directory.""" 

325 # SMAC2 Specific directory 

326 if self.directory: 

327 return self.directory / "outdir_train_configuration" 

328 return None 

329 

330 @property 

331 def feature_file_path(self: SMAC2Scenario) -> Path: 

332 """Return the path of the feature file.""" 

333 if self._feature_file_path: 

334 return self._feature_file_path 

335 elif self.directory: 

336 return self.directory / f"{self.instance_set.name}_features.csv" 

337 else: 

338 return None 

339 

340 @property 

341 def configurator(self: SMAC2Scenario) -> SMAC2: 

342 """Return the type of configurator the scenario belongs to.""" 

343 return SMAC2 

344 

345 def create_scenario(self: SMAC2Scenario) -> None: 

346 """Create scenario with solver and instances in the parent directory. 

347 

348 This prepares all the necessary subdirectories related to configuration. 

349 

350 Args: 

351 parent_directory: Directory in which the scenario should be created. 

352 """ 

353 super().create_scenario() 

354 self.outdir_train.mkdir() 

355 self._prepare_instances() 

356 

357 if self.feature_data is not None: 

358 self._create_feature_file() 

359 

360 self.create_scenario_file() 

361 

362 def create_scenario_file( 

363 self: SMAC2Scenario, 

364 configurator_target: Path = SMAC2.configurator_target, 

365 pcs_port: PCSConvention = PCSConvention.SMAC, 

366 ) -> Path: 

367 """Create a file with the configuration scenario. 

368 

369 Writes supplementary information to the target algorithm (algo =) as: 

370 algo = {configurator_target} {solver_directory} {sparkle_objective} 

371 """ 

372 with self.scenario_file_path.open("w") as file: 

373 file.write( 

374 f"algo = {configurator_target.absolute()} " 

375 f"{self.solver.directory} {self.tmp} {self.sparkle_objective} \n" 

376 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

377 f"run_obj = {self._get_performance_measure()}\n" 

378 f"cutoffTime = {self.solver_cutoff_time}\n" 

379 f"paramfile = {self.solver.get_pcs_file(pcs_port)}\n" 

380 f"outdir = {self.outdir_train}\n" 

381 f"instance_file = {self.instance_file_path}\n" 

382 f"test_instance_file = {self.instance_file_path}\n" 

383 ) 

384 if self.cutoff_length is not None: 

385 file.write(f"cutoff_length = {self.cutoff_length}\n") 

386 if self.max_iterations is not None: 

387 file.write(f"iteration-limit = {self.max_iterations}\n") 

388 if self.wallclock_time is not None: 

389 file.write(f"wallclock-limit = {self.wallclock_time}\n") 

390 if self.cpu_time is not None: 

391 file.write(f"cputime-limit = {self.cpu_time}\n") 

392 if self.solver_calls is not None: 

393 file.write(f"runcount-limit = {self.solver_calls}\n") 

394 if self.cli_cores is not None: 

395 file.write(f"cli-cores = {self.cli_cores}") 

396 if self.feature_data is not None: 

397 file.write(f"feature_file = {self.feature_file_path}\n") 

398 if self.use_cpu_time_in_tunertime is not None: 

399 file.write( 

400 f"use-cpu-time-in-tunertime = {self.use_cpu_time_in_tunertime}\n" 

401 ) 

402 # We don't let SMAC do the validation 

403 file.write("validation = false" + "\n") 

404 return self.scenario_file_path 

405 

406 def _prepare_instances(self: SMAC2Scenario) -> None: 

407 """Create instance list file without instance specifics.""" 

408 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True) 

409 with self.instance_file_path.open("w+") as file: 

410 for instance_path in self.instance_set._instance_paths: 

411 file.write(f"{instance_path}\n") 

412 

413 def _create_feature_file(self: SMAC2Scenario) -> None: 

414 """Create CSV file from feature data.""" 

415 self.feature_data.to_csv(self.feature_file_path, index_label="INSTANCE_NAME") 

416 

417 def _get_performance_measure(self: SMAC2Scenario) -> str: 

418 """Retrieve the performance measure of the SparkleObjective. 

419 

420 Returns: 

421 Performance measure of the sparkle objective 

422 """ 

423 if self.sparkle_objective.time: 

424 return "RUNTIME" 

425 return "QUALITY" 

426 

427 def serialise(self: SMAC2Scenario) -> dict: 

428 """Transform ConfigurationScenario to dictionary format.""" 

429 return { 

430 "number_of_runs": self.number_of_runs, 

431 "solver_calls": self.solver_calls, 

432 "cpu_time": self.cpu_time, 

433 "wallclock_time": self.wallclock_time, 

434 "solver_cutoff_time": self.solver_cutoff_time, 

435 "cutoff_length": self.cutoff_length, 

436 "max_iterations": self.max_iterations, 

437 "sparkle_objective": self.sparkle_objective.name, 

438 "feature_data": str(self.feature_file_path), 

439 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime, 

440 } 

441 

442 @staticmethod 

443 def from_file(scenario_file: Path) -> SMAC2Scenario: 

444 """Reads scenario file and initalises SMAC2Scenario.""" 

445 config = { 

446 keyvalue[0]: keyvalue[1] 

447 for keyvalue in ( 

448 line.strip().split(" = ", maxsplit=1) 

449 for line in scenario_file.open().readlines() 

450 if line.strip() != "" 

451 ) 

452 } 

453 

454 # Collect relevant settings 

455 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None 

456 wallclock_limit = ( 

457 int(config["wallclock-limit"]) if "wallclock-limit" in config else None 

458 ) 

459 solver_calls = ( 

460 int(config["runcount-limit"]) if "runcount-limit" in config else None 

461 ) 

462 max_iterations = ( 

463 int(config["iteration-limit"]) if "iteration-limit" in config else None 

464 ) 

465 use_cpu_time_in_tunertime = ( 

466 config["use-cputime-in-tunertime"] 

467 if "use-cputime-in-tunertime" in config 

468 else None 

469 ) 

470 cli_cores = config["cli-cores"] if "cli-cores" in config else None 

471 

472 _, solver_path, _, objective_str = config["algo"].split(" ") 

473 objective = resolve_objective(objective_str) 

474 solver = Solver(Path(solver_path.strip())) 

475 # Extract the instance set from the instance file 

476 instance_file_path = Path(config["instance_file"]) 

477 instance_set_path = Path(instance_file_path.open().readline().strip()).parent 

478 instance_set = Instance_Set(Path(instance_set_path)) 

479 results_folder = scenario_file.parent / "results" 

480 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()] 

481 number_of_runs = len(state_run_dirs) 

482 feature_data_path = None 

483 if "feature_file" in config: 

484 feature_data_path = Path(config["feature_file"]) 

485 # Get the timestamp from the scenario dir name 

486 timestamp = scenario_file.parent.name.split("_")[-1] 

487 return SMAC2Scenario( 

488 solver, 

489 instance_set, 

490 [objective], 

491 number_of_runs, 

492 instance_file_path.parent.parent, 

493 solver_calls, 

494 max_iterations, 

495 cpu_time, 

496 wallclock_limit, 

497 int(config["cutoffTime"]), 

498 config["cutoff_length"], 

499 cli_cores, 

500 use_cpu_time_in_tunertime, 

501 feature_data_path, 

502 timestamp, 

503 )