Coverage for sparkle/configurator/implementations/smac2.py: 80%

174 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 14:48 +0000

1#!/usr/bin/env python3 

2# -*- coding: UTF-8 -*- 

3"""Configurator classes to implement SMAC2 in Sparkle.""" 

4from __future__ import annotations 

5from pathlib import Path 

6import fcntl 

7import glob 

8import shutil 

9 

10import pandas as pd 

11 

12import runrunner as rrr 

13from runrunner import Runner, Run 

14 

15from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

16from sparkle.solver import Solver 

17from sparkle.solver.validator import Validator 

18from sparkle.instance import InstanceSet, Instance_Set 

19from sparkle.types import SparkleObjective 

20 

21 

22class SMAC2(Configurator): 

23 """Class for SMAC2 (Java) configurator.""" 

24 configurator_path = Path(__file__).parent.parent.parent.resolve() /\ 

25 "Components/smac-v2.10.03-master-778" 

26 configurator_executable = configurator_path / "smac" 

27 configurator_target = configurator_path / "smac_target_algorithm.py" 

28 

29 version = "2.10.03" 

30 full_name = "Sequential Model-based Algorithm Configuration" 

31 

32 def __init__(self: SMAC2, 

33 base_dir: Path, 

34 output_path: Path) -> None: 

35 """Returns the SMAC configurator, Java SMAC V2.10.03. 

36 

37 Args: 

38 objectives: The objectives to optimize. Only supports one objective. 

39 base_dir: The path where the configurator will be executed in. 

40 output_path: The path where the output will be placed. 

41 """ 

42 output_path = output_path / SMAC2.__name__ 

43 output_path.mkdir(parents=True, exist_ok=True) 

44 return super().__init__( 

45 validator=Validator(out_dir=output_path), 

46 output_path=output_path, 

47 base_dir=base_dir, 

48 tmp_path=output_path / "tmp", 

49 multi_objective_support=False) 

50 

51 @property 

52 def name(self: SMAC2) -> str: 

53 """Returns the name of the configurator.""" 

54 return SMAC2.__name__ 

55 

56 @property 

57 def scenario_class(self: Configurator) -> ConfigurationScenario: 

58 """Returns the SMAC2 scenario class.""" 

59 return SMAC2Scenario 

60 

61 def configure(self: Configurator, 

62 scenario: ConfigurationScenario, 

63 validate_after: bool = True, 

64 sbatch_options: list[str] = [], 

65 num_parallel_jobs: int = None, 

66 base_dir: Path = None, 

67 run_on: Runner = Runner.SLURM) -> list[Run]: 

68 """Start configuration job. 

69 

70 Args: 

71 scenario: ConfigurationScenario object 

72 validate_after: Whether the Validator will be called after the configuration 

73 sbatch_options: List of slurm batch options to use 

74 num_parallel_jobs: The maximum number of jobs to run parallel. 

75 base_dir: The path where the sbatch scripts will be created for Slurm. 

76 run_on: On which platform to run the jobs. Default: Slurm. 

77 

78 Returns: 

79 A RunRunner Run object. 

80 """ 

81 if shutil.which("java") is None: 

82 raise RuntimeError( 

83 "SMAC2 requires Java 1.8.0_402, but Java is not installed. " 

84 "Please ensure Java is installed and try again." 

85 ) 

86 scenario.create_scenario() 

87 output_csv = scenario.validation / "configurations.csv" 

88 output_csv.parent.mkdir(exist_ok=True, parents=True) 

89 output = [f"{(scenario.results_directory).absolute()}/" 

90 f"{scenario.name}_seed_{seed}_smac.txt" 

91 for seed in range(scenario.number_of_runs)] 

92 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} " 

93 f"{SMAC2.__name__} {output[seed]} {output_csv.absolute()} " 

94 f"{SMAC2.configurator_executable.absolute()} " 

95 f"--scenario-file {scenario.scenario_file_path.absolute()} " 

96 f"--seed {seed} " 

97 f"--execdir {scenario.tmp.absolute()}" 

98 for seed in range(scenario.number_of_runs)] 

99 parallel_jobs = scenario.number_of_runs 

100 if num_parallel_jobs is not None: 

101 parallel_jobs = max(num_parallel_jobs, scenario.number_of_runs) 

102 runs = [rrr.add_to_queue( 

103 runner=run_on, 

104 cmd=cmds, 

105 name=f"{self.name}: {scenario.solver.name} on {scenario.instance_set.name}", 

106 base_dir=base_dir, 

107 path=scenario.results_directory, 

108 output_path=output, 

109 parallel_jobs=parallel_jobs, 

110 sbatch_options=sbatch_options, 

111 srun_options=["-N1", "-n1"])] 

112 

113 if validate_after: 

114 self.validator.out_dir = output_csv.parent 

115 self.validator.tmp_out_dir = base_dir 

116 validate_run = self.validator.validate( 

117 [scenario.solver] * scenario.number_of_runs, 

118 output_csv, 

119 [scenario.instance_set], 

120 [scenario.sparkle_objective], 

121 scenario.cutoff_time, 

122 subdir=Path(), 

123 dependency=runs, 

124 sbatch_options=sbatch_options, 

125 run_on=run_on) 

126 runs.append(validate_run) 

127 

128 if run_on == Runner.LOCAL: 

129 for run in runs: 

130 run.wait() 

131 return runs 

132 

133 @staticmethod 

134 def organise_output(output_source: Path, output_target: Path = None) -> None | str: 

135 """Retrieves configurations from SMAC files and places them in output.""" 

136 call_key = SMAC2.configurator_target.name 

137 # Last line describing a call is the best found configuration 

138 for line in reversed(output_source.open("r").readlines()): 

139 if call_key in line: 

140 call_str = line.split(call_key, maxsplit=1)[1].strip() 

141 # The Configuration appears after the first 6 arguments 

142 configuration = call_str.split(" ", 7)[-1] 

143 if output_target is None: 

144 return configuration 

145 with output_target.open("a") as fout: 

146 fcntl.flock(fout.fileno(), fcntl.LOCK_EX) 

147 fout.write(configuration + "\n") 

148 break 

149 

150 @staticmethod 

151 def get_smac_run_obj(objective: SparkleObjective) -> str: 

152 """Return the SMAC run objective based on the Performance Measure. 

153 

154 Returns: 

155 A string that represents the run objective set in the settings. 

156 """ 

157 if objective.time: 

158 return "RUNTIME" 

159 return "QUALITY" 

160 

161 def get_status_from_logs(self: SMAC2) -> None: 

162 """Method to scan the log files of the configurator for warnings.""" 

163 base_dir = self.output_path / "scenarios" 

164 if not base_dir.exists(): 

165 return 

166 print(f"Checking the log files of configurator {type(self).__name__} for " 

167 "warnings...") 

168 scenarios = [f for f in base_dir.iterdir() if f.is_dir()] 

169 for scenario in scenarios: 

170 log_dir = scenario / "outdir_train_configuration" \ 

171 / (scenario.name + "_scenario") 

172 warn_files = glob.glob(str(log_dir) + "/log-warn*") 

173 non_empty = [log_file for log_file in warn_files 

174 if Path(log_file).stat().st_size > 0] 

175 if len(non_empty) > 0: 

176 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see " 

177 "the following log file(s) for more information:") 

178 for log_file in non_empty: 

179 print(f"\t-{log_file}") 

180 else: 

181 print(f"Scenario {scenario.name} has no warnings.") 

182 

183 

184class SMAC2Scenario(ConfigurationScenario): 

185 """Class to handle SMAC2 configuration scenarios.""" 

186 def __init__(self: SMAC2Scenario, solver: Solver, 

187 instance_set: InstanceSet, 

188 sparkle_objectives: list[SparkleObjective], 

189 parent_directory: Path, 

190 number_of_runs: int = None, 

191 solver_calls: int = None, 

192 max_iterations: int = None, 

193 cpu_time: int = None, 

194 wallclock_time: int = None, 

195 cutoff_time: int = None, 

196 target_cutoff_length: str = None, 

197 use_cpu_time_in_tunertime: bool = None, 

198 feature_data_df: pd.DataFrame = None)\ 

199 -> None: 

200 """Initialize scenario paths and names. 

201 

202 Args: 

203 solver: Solver that should be configured. 

204 instance_set: Instances object for the scenario. 

205 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

206 Will be simplified to the first objective. 

207 parent_directory: Directory in which the scenario should be created. 

208 number_of_runs: The number of configurator runs to perform 

209 for configuring the solver. 

210 solver_calls: The number of times the solver is called for each 

211 configuration run 

212 max_iterations: The maximum number of iterations allowed for each 

213 configuration run. [iteration-limit, numIterations, numberOfIterations] 

214 cpu_time: The time budget allocated for each configuration run. (cpu) 

215 wallclock_time: The time budget allocated for each configuration run. 

216 (wallclock) 

217 cutoff_time: The maximum time allowed for each individual run during 

218 configuration. 

219 target_cutoff_length: A domain specific measure of when the algorithm 

220 should consider itself done. 

221 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for 

222 budget deduction. Defaults in SMAC2 to True. 

223 feature_data_df: If features are used, this contains the feature data. 

224 Defaults to None. 

225 """ 

226 super().__init__(solver, instance_set, sparkle_objectives, parent_directory) 

227 self.solver = solver 

228 self.instance_set = instance_set 

229 self.name = f"{self.solver.name}_{self.instance_set.name}" 

230 

231 if sparkle_objectives is not None: 

232 if len(sparkle_objectives) > 1: 

233 print("WARNING: SMAC2 does not have multi objective support. " 

234 "Only the first objective will be used.") 

235 self.sparkle_objective = sparkle_objectives[0] 

236 else: 

237 self.sparkle_objective = None 

238 

239 self.number_of_runs = number_of_runs 

240 self.solver_calls = solver_calls 

241 self.cpu_time = cpu_time 

242 self.wallclock_time = wallclock_time 

243 self.cutoff_time = cutoff_time 

244 self.cutoff_length = target_cutoff_length 

245 self.max_iterations = max_iterations 

246 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime 

247 self.feature_data = feature_data_df 

248 

249 # Scenario Paths 

250 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt" 

251 self.tmp = self.directory / "tmp" 

252 self.validation = self.directory / "validation" 

253 self.results_directory = self.directory / "results" 

254 

255 # SMAC2 Specific 

256 self.outdir_train = self.directory / "outdir_train_configuration" 

257 

258 def create_scenario(self: SMAC2Scenario) -> None: 

259 """Create scenario with solver and instances in the parent directory. 

260 

261 This prepares all the necessary subdirectories related to configuration. 

262 

263 Args: 

264 parent_directory: Directory in which the scenario should be created. 

265 """ 

266 # Prepare scenario directory 

267 shutil.rmtree(self.directory, ignore_errors=True) 

268 self.directory.mkdir(parents=True) 

269 # Create empty directories as needed 

270 self.outdir_train.mkdir() 

271 self.tmp.mkdir() 

272 self.results_directory.mkdir(parents=True) # Prepare results directory 

273 

274 self._prepare_instances() 

275 

276 if self.feature_data is not None: 

277 self._create_feature_file() 

278 

279 self.create_scenario_file() 

280 

281 def create_scenario_file(self: SMAC2Scenario) -> Path: 

282 """Create a file with the configuration scenario. 

283 

284 Writes supplementary information to the target algorithm (algo =) as: 

285 algo = {configurator_target} {solver_directory} {sparkle_objective} 

286 """ 

287 with self.scenario_file_path.open("w") as file: 

288 file.write(f"algo = {SMAC2.configurator_target.absolute()} " 

289 f"{self.solver.directory.absolute()} {self.sparkle_objective} \n" 

290 f"execdir = {self.tmp.absolute()}/\n" 

291 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

292 f"run_obj = {self._get_performance_measure()}\n" 

293 f"cutoffTime = {self.cutoff_time}\n" 

294 f"cutoff_length = {self.cutoff_length}\n" 

295 f"paramfile = {self.solver.get_pcs_file()}\n" 

296 f"outdir = {self.outdir_train.absolute()}\n" 

297 f"instance_file = {self.instance_file_path.absolute()}\n" 

298 f"test_instance_file = {self.instance_file_path.absolute()}\n") 

299 if self.max_iterations is not None: 

300 file.write(f"iteration-limit = {self.max_iterations}\n") 

301 if self.wallclock_time is not None: 

302 file.write(f"wallclock-limit = {self.wallclock_time}\n") 

303 if self.cpu_time is not None: 

304 file.write(f"cputime-limit = {self.cpu_time}\n") 

305 if self.solver_calls is not None: 

306 file.write(f"runcount-limit = {self.solver_calls}\n") 

307 if self.feature_data is not None: 

308 file.write(f"feature_file = {self.feature_file_path}\n") 

309 if self.use_cpu_time_in_tunertime is not None: 

310 file.write("use-cpu-time-in-tunertime = " 

311 f"{self.use_cpu_time_in_tunertime}\n") 

312 # We don't let SMAC do the validation 

313 file.write("validation = false" + "\n") 

314 return self.scenario_file_path 

315 

316 def _prepare_instances(self: SMAC2Scenario) -> None: 

317 """Create instance list file without instance specifics.""" 

318 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True) 

319 with self.instance_file_path.open("w+") as file: 

320 for instance_path in self.instance_set._instance_paths: 

321 file.write(f"{instance_path.absolute()}\n") 

322 

323 def _get_performance_measure(self: SMAC2Scenario) -> str: 

324 """Retrieve the performance measure of the SparkleObjective. 

325 

326 Returns: 

327 Performance measure of the sparkle objective 

328 """ 

329 if self.sparkle_objective.time: 

330 return "RUNTIME" 

331 return "QUALITY" 

332 

333 def _create_feature_file(self: SMAC2Scenario) -> None: 

334 """Create CSV file from feature data.""" 

335 self.feature_file_path = Path(self.directory 

336 / f"{self.instance_set.name}_features.csv") 

337 self.feature_data.to_csv(self.directory 

338 / self.feature_file_path, index_label="INSTANCE_NAME") 

339 

340 def _clean_up_scenario_dirs(self: SMAC2Scenario, 

341 configurator_path: Path,) -> list[Path]: 

342 """Yield directories to clean up after configuration scenario is done. 

343 

344 Returns: 

345 list[str]: Full paths to directories that can be removed 

346 """ 

347 result = [] 

348 configurator_solver_path = configurator_path / "scenarios"\ 

349 / f"{self.solver.name}_{self.instance_set.name}" 

350 

351 for index in range(self.number_of_runs): 

352 dir = configurator_solver_path / str(index) 

353 result.append(dir) 

354 return result 

355 

356 def serialize_scenario(self: SMAC2Scenario) -> dict: 

357 """Transform ConfigurationScenario to dictionary format.""" 

358 return { 

359 "number_of_runs": self.number_of_runs, 

360 "solver_calls": self.solver_calls, 

361 "cpu_time": self.cpu_time, 

362 "wallclock_time": self.wallclock_time, 

363 "cutoff_time": self.cutoff_time, 

364 "cutoff_length": self.cutoff_length, 

365 "max_iterations": self.max_iterations, 

366 "sparkle_objective": self.sparkle_objective.name, 

367 "feature_data": self.feature_data, 

368 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime 

369 } 

370 

371 @staticmethod 

372 def from_file(scenario_file: Path) -> SMAC2Scenario: 

373 """Reads scenario file and initalises SMAC2Scenario.""" 

374 config = {keyvalue[0]: keyvalue[1] 

375 for keyvalue in (line.strip().split(" = ", maxsplit=1) 

376 for line in scenario_file.open().readlines() 

377 if line.strip() != "")} 

378 

379 # Collect relevant settings 

380 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None 

381 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \ 

382 else None 

383 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \ 

384 else None 

385 max_iterations = int(config["iteration-limit"]) if "iteration-limit" in config \ 

386 else None 

387 use_cpu_time_in_tunertime = config["use-cputime-in-tunertime"]\ 

388 if "use-cputime-in-tunertime" in config else None 

389 

390 _, solver_path, objective_str = config["algo"].split(" ") 

391 objective = SparkleObjective(objective_str) 

392 solver = Solver(Path(solver_path.strip())) 

393 # Extract the instance set from the instance file 

394 instance_file_path = Path(config["instance_file"]) 

395 instance_set_path = Path(instance_file_path.open().readline().strip()).parent 

396 instance_set = Instance_Set(Path(instance_set_path)) 

397 results_folder = scenario_file.parent / "results" 

398 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()] 

399 number_of_runs = len(state_run_dirs) 

400 return SMAC2Scenario(solver, 

401 instance_set, 

402 [objective], 

403 instance_file_path.parent.parent, 

404 number_of_runs, 

405 solver_calls, 

406 max_iterations, 

407 cpu_time, 

408 wallclock_limit, 

409 int(config["cutoffTime"]), 

410 config["cutoff_length"], 

411 use_cpu_time_in_tunertime)