Coverage for sparkle/configurator/implementations/smac2.py: 73%

203 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-27 09:10 +0000

1#!/usr/bin/env python3 

2# -*- coding: UTF-8 -*- 

3"""Configurator class to use different configurators like SMAC.""" 

4 

5from __future__ import annotations 

6from typing import Callable 

7from pathlib import Path 

8import ast 

9from statistics import mean 

10import operator 

11import fcntl 

12import glob 

13import shutil 

14 

15import pandas as pd 

16 

17import runrunner as rrr 

18from runrunner import Runner, Run 

19 

20from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

21from sparkle.solver import Solver 

22from sparkle.solver.validator import Validator 

23from sparkle.instance import InstanceSet 

24from sparkle.types import SparkleObjective 

25 

26 

27class SMAC2(Configurator): 

28 """Class for SMAC2 (Java) configurator.""" 

29 configurator_path = Path(__file__).parent.parent.parent.resolve() /\ 

30 "Components/smac-v2.10.03-master-778" 

31 target_algorithm = "smac_target_algorithm.py" 

32 

33 def __init__(self: SMAC2, 

34 objectives: list[SparkleObjective], 

35 base_dir: Path, 

36 output_path: Path) -> None: 

37 """Returns the SMAC configurator, Java SMAC V2.10.03. 

38 

39 Args: 

40 objectives: The objectives to optimize. Only supports one objective. 

41 base_dir: The path where the configurator will be executed in. 

42 output_path: The path where the output will be placed. 

43 """ 

44 output_path = output_path / SMAC2.__name__ 

45 output_path.mkdir(parents=True, exist_ok=True) 

46 return super().__init__( 

47 validator=Validator(out_dir=output_path), 

48 output_path=output_path, 

49 executable_path=SMAC2.configurator_path / "smac", 

50 configurator_target=SMAC2.configurator_path / SMAC2.target_algorithm, 

51 objectives=objectives, 

52 base_dir=base_dir, 

53 tmp_path=output_path / "tmp", 

54 multi_objective_support=False) 

55 

56 @property 

57 def scenario_class(self: Configurator) -> ConfigurationScenario: 

58 """Returns the SMAC2 scenario class.""" 

59 return SMAC2Scenario 

60 

61 def configure(self: Configurator, 

62 scenario: ConfigurationScenario, 

63 validate_after: bool = True, 

64 sbatch_options: list[str] = [], 

65 num_parallel_jobs: int = None, 

66 base_dir: Path = None, 

67 run_on: Runner = Runner.SLURM) -> list[Run]: 

68 """Start configuration job. 

69 

70 Args: 

71 scenario: ConfigurationScenario object 

72 validate_after: Whether the Validator will be called after the configuration 

73 sbatch_options: List of slurm batch options to use 

74 num_parallel_jobs: The maximum number of jobs to run parallel. 

75 base_dir: The path where the sbatch scripts will be created for Slurm. 

76 run_on: On which platform to run the jobs. Default: Slurm. 

77 

78 Returns: 

79 A RunRunner Run object. 

80 """ 

81 self.scenario = scenario 

82 self.scenario.create_scenario(parent_directory=self.output_path) 

83 output_csv = self.scenario.validation / "configurations.csv" 

84 output_csv.parent.mkdir(exist_ok=True, parents=True) 

85 output = [f"{(self.scenario.result_directory).absolute()}/" 

86 f"{self.scenario.name}_seed_{seed}_smac.txt" 

87 for seed in range(self.scenario.number_of_runs)] 

88 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} " 

89 f"{SMAC2.__name__} {output[seed]} {output_csv.absolute()} " 

90 f"{self.executable_path.absolute()} " 

91 f"--scenario-file {(self.scenario.scenario_file_path).absolute()} " 

92 f"--seed {seed} " 

93 f"--execdir {self.scenario.tmp.absolute()}" 

94 for seed in range(self.scenario.number_of_runs)] 

95 parallel_jobs = self.scenario.number_of_runs 

96 if num_parallel_jobs is not None: 

97 parallel_jobs = max(num_parallel_jobs, 

98 self.scenario.number_of_runs) 

99 configuration_run = rrr.add_to_queue( 

100 runner=run_on, 

101 cmd=cmds, 

102 name="configure_solver", 

103 base_dir=base_dir, 

104 output_path=output, 

105 parallel_jobs=parallel_jobs, 

106 sbatch_options=sbatch_options, 

107 srun_options=["-N1", "-n1"]) 

108 runs = [configuration_run] 

109 

110 if validate_after: 

111 self.validator.out_dir = output_csv.parent 

112 self.validator.tmp_out_dir = base_dir 

113 validate_run = self.validator.validate( 

114 [scenario.solver] * self.scenario.number_of_runs, 

115 output_csv.absolute(), 

116 [scenario.instance_set], 

117 [self.scenario.sparkle_objective], 

118 scenario.cutoff_time, 

119 subdir=Path(), 

120 dependency=configuration_run, 

121 sbatch_options=sbatch_options, 

122 run_on=run_on) 

123 runs.append(validate_run) 

124 

125 if run_on == Runner.LOCAL: 

126 for run in runs: 

127 run.wait() 

128 return runs 

129 

130 def get_optimal_configuration( 

131 self: Configurator, 

132 solver: Solver, 

133 instance_set: InstanceSet, 

134 objective: SparkleObjective = None, 

135 aggregate_config: Callable = mean) -> tuple[float, str]: 

136 """Returns optimal value and configuration string of solver on instance set.""" 

137 if self.scenario is None: 

138 self.set_scenario_dirs(solver, instance_set) 

139 results = self.validator.get_validation_results( 

140 solver, 

141 instance_set, 

142 source_dir=self.scenario.validation, 

143 subdir=self.scenario.validation.relative_to(self.validator.out_dir)) 

144 # Group the results per configuration 

145 if objective is None: 

146 objective = self.objectives[0] 

147 value_column = results[0].index(objective.name) 

148 config_column = results[0].index("Configuration") 

149 configurations = list(set(row[config_column] for row in results[1:])) 

150 config_scores = [] 

151 for config in configurations: 

152 values = [float(row[value_column]) 

153 for row in results[1:] if row[1] == config] 

154 config_scores.append(aggregate_config(values)) 

155 

156 comparison = operator.lt if objective.minimise else operator.gt 

157 

158 # Return optimal value 

159 min_index = 0 

160 current_optimal = config_scores[min_index] 

161 for i, score in enumerate(config_scores): 

162 if comparison(score, current_optimal): 

163 min_index, current_optimal = i, score 

164 

165 # Return the optimal configuration dictionary as commandline args 

166 config_str = configurations[min_index].strip(" ") 

167 if config_str.startswith("{"): 

168 config = ast.literal_eval(config_str) 

169 config_str = " ".join([f"-{key} '{config[key]}'" for key in config]) 

170 return current_optimal, config_str 

171 

172 @staticmethod 

173 def organise_output(output_source: Path, output_target: Path = None) -> None | str: 

174 """Retrieves configurations from SMAC files and places them in output.""" 

175 call_key = SMAC2.target_algorithm 

176 # Last line describing a call is the best found configuration 

177 for line in reversed(output_source.open("r").readlines()): 

178 if call_key in line: 

179 call_str = line.split(call_key, maxsplit=1)[1].strip() 

180 # The Configuration appears after the first 6 arguments 

181 configuration = call_str.split(" ", 7)[-1] 

182 if output_target is None: 

183 return configuration 

184 with output_target.open("a") as fout: 

185 fcntl.flock(fout.fileno(), fcntl.LOCK_EX) 

186 fout.write(configuration + "\n") 

187 break 

188 

189 def set_scenario_dirs(self: Configurator, 

190 solver: Solver, instance_set: InstanceSet) -> None: 

191 """Patching method to allow the rebuilding of configuratio scenario.""" 

192 self.scenario = self.scenario_class(solver, instance_set) 

193 self.scenario._set_paths(self.output_path) 

194 

195 @staticmethod 

196 def get_smac_run_obj(objective: SparkleObjective) -> str: 

197 """Return the SMAC run objective based on the Performance Measure. 

198 

199 Returns: 

200 A string that represents the run objective set in the settings. 

201 """ 

202 if objective.time: 

203 return "RUNTIME" 

204 return "QUALITY" 

205 

206 def get_status_from_logs(self: SMAC2) -> None: 

207 """Method to scan the log files of the configurator for warnings.""" 

208 base_dir = self.output_path / "scenarios" 

209 if not base_dir.exists(): 

210 return 

211 print(f"Checking the log files of configurator {type(self).__name__} for " 

212 "warnings...") 

213 scenarios = [f for f in base_dir.iterdir() if f.is_dir()] 

214 for scenario in scenarios: 

215 log_dir = scenario / "outdir_train_configuration" \ 

216 / (scenario.name + "_scenario") 

217 warn_files = glob.glob(str(log_dir) + "/log-warn*") 

218 non_empty = [log_file for log_file in warn_files 

219 if Path(log_file).stat().st_size > 0] 

220 if len(non_empty) > 0: 

221 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see " 

222 "the following log file(s) for more information:") 

223 for log_file in non_empty: 

224 print(f"\t-{log_file}") 

225 else: 

226 print(f"Scenario {scenario.name} has no warnings.") 

227 

228 

229class SMAC2Scenario(ConfigurationScenario): 

230 """Class to handle SMAC2 configuration scenarios.""" 

231 def __init__(self: ConfigurationScenario, solver: Solver, 

232 instance_set: InstanceSet, number_of_runs: int = None, 

233 solver_calls: int = None, cpu_time: int = None, 

234 wallclock_time: int = None, cutoff_time: int = None, 

235 cutoff_length: int = None, 

236 sparkle_objectives: list[SparkleObjective] = None, 

237 use_features: bool = None, configurator_target: Path = None, 

238 feature_data_df: pd.DataFrame = None)\ 

239 -> None: 

240 """Initialize scenario paths and names. 

241 

242 Args: 

243 solver: Solver that should be configured. 

244 instance_set: Instances object for the scenario. 

245 number_of_runs: The number of configurator runs to perform 

246 for configuring the solver. 

247 solver_calls: The number of times the solver is called for each 

248 configuration run 

249 cpu_time: The time budget allocated for each configuration run. (cpu) 

250 wallclock_time: The time budget allocated for each configuration run. 

251 (wallclock) 

252 cutoff_time: The maximum time allowed for each individual run during 

253 configuration. 

254 cutoff_length: The maximum number of iterations allowed for each 

255 individual run during configuration. 

256 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

257 Will be simplified to the first objective. 

258 use_features: Boolean indicating if features should be used. 

259 configurator_target: The target Python script to be called. 

260 This script standardises Configurator I/O for solver wrappers. 

261 feature_data_df: If features are used, this contains the feature data. 

262 Defaults to None. 

263 """ 

264 super().__init__(solver, instance_set, sparkle_objectives) 

265 self.solver = solver 

266 self.instance_set = instance_set 

267 self.name = f"{self.solver.name}_{self.instance_set.name}" 

268 self.sparkle_objective = sparkle_objectives[0] if sparkle_objectives else None 

269 

270 self.number_of_runs = number_of_runs 

271 self.solver_calls = solver_calls 

272 self.cpu_time = cpu_time 

273 self.wallclock_time = wallclock_time 

274 self.cutoff_time = cutoff_time 

275 self.cutoff_length = cutoff_length 

276 self.use_features = use_features 

277 self.configurator_target = configurator_target 

278 self.feature_data = feature_data_df 

279 

280 self.parent_directory = Path() 

281 self.directory = Path() 

282 self.result_directory = Path() 

283 self.scenario_file_path = Path() 

284 self.feature_file_path = Path() 

285 self.instance_file_path = Path() 

286 

287 def create_scenario(self: ConfigurationScenario, parent_directory: Path) -> None: 

288 """Create scenario with solver and instances in the parent directory. 

289 

290 This prepares all the necessary subdirectories related to configuration. 

291 

292 Args: 

293 parent_directory: Directory in which the scenario should be created. 

294 """ 

295 self._set_paths(parent_directory) 

296 self._prepare_scenario_directory() 

297 self._prepare_result_directory() 

298 self._prepare_instances() 

299 

300 if self.use_features: 

301 self._create_feature_file() 

302 

303 self._create_scenario_file() 

304 

305 def _set_paths(self: ConfigurationScenario, parent_directory: Path) -> None: 

306 """Set the paths for the scenario based on the specified parent directory.""" 

307 self.parent_directory = parent_directory 

308 self.directory = self.parent_directory / "scenarios" / self.name 

309 self.result_directory = self.directory / "results" 

310 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt" 

311 self.outdir_train = self.directory / "outdir_train_configuration" 

312 self.tmp = self.directory / "tmp" 

313 self.validation = self.directory / "validation" 

314 

315 def _prepare_scenario_directory(self: ConfigurationScenario) -> None: 

316 """Delete old scenario dir, recreate it, create empty dirs inside.""" 

317 shutil.rmtree(self.directory, ignore_errors=True) 

318 self.directory.mkdir(parents=True) 

319 

320 # Create empty directories as needed 

321 self.outdir_train.mkdir() 

322 self.tmp.mkdir() 

323 

324 def _prepare_result_directory(self: ConfigurationScenario) -> None: 

325 """Delete possible files in result directory.""" 

326 shutil.rmtree(self.result_directory, ignore_errors=True) 

327 self.result_directory.mkdir(parents=True) 

328 

329 def _create_scenario_file(self: ConfigurationScenario) -> None: 

330 """Create a file with the configuration scenario. 

331 

332 Writes supplementary information to the target algorithm (algo =) as: 

333 algo = {configurator_target} {solver_directory} {sparkle_objective} 

334 """ 

335 self.scenario_file_path = self.directory / f"{self.name}_scenario.txt" 

336 with self.scenario_file_path.open("w") as file: 

337 file.write(f"algo = {self.configurator_target.absolute()} " 

338 f"{self.solver.directory.absolute()} {self.sparkle_objective} \n" 

339 f"execdir = {self.tmp.absolute()}/\n" 

340 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

341 f"run_obj = {self._get_performance_measure()}\n" 

342 f"cutoffTime = {self.cutoff_time}\n" 

343 f"cutoff_length = {self.cutoff_length}\n" 

344 f"paramfile = {self.solver.get_pcs_file()}\n" 

345 f"outdir = {self.outdir_train.absolute()}\n" 

346 f"instance_file = {self.instance_file_path.absolute()}\n" 

347 f"test_instance_file = {self.instance_file_path.absolute()}\n") 

348 if self.use_features: 

349 file.write(f"feature_file = {self.feature_file_path}\n") 

350 if self.wallclock_time is not None: 

351 file.write(f"wallclock-limit = {self.wallclock_time}\n") 

352 if self.cpu_time is not None: 

353 file.write(f"cputime-limit = {self.cpu_time}\n") 

354 if self.solver_calls is not None: 

355 file.write(f"runcount-limit = {self.solver_calls}\n") 

356 # We don't let SMAC do the validation 

357 file.write("validation = false" + "\n") 

358 

359 def _prepare_instances(self: ConfigurationScenario) -> None: 

360 """Create instance list file without instance specifics.""" 

361 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True) 

362 with self.instance_file_path.open("w+") as file: 

363 for instance_path in self.instance_set._instance_paths: 

364 file.write(f"{instance_path.absolute()}\n") 

365 

366 def _get_performance_measure(self: ConfigurationScenario) -> str: 

367 """Retrieve the performance measure of the SparkleObjective. 

368 

369 Returns: 

370 Performance measure of the sparkle objective 

371 """ 

372 if self.sparkle_objective.time: 

373 return "RUNTIME" 

374 return "QUALITY" 

375 

376 def _create_feature_file(self: ConfigurationScenario) -> None: 

377 """Create CSV file from feature data.""" 

378 self.feature_file_path = Path(self.directory 

379 / f"{self.instance_set.name}_features.csv") 

380 self.feature_data.to_csv(self.directory 

381 / self.feature_file_path, index_label="INSTANCE_NAME") 

382 

383 def _clean_up_scenario_dirs(self: ConfigurationScenario, 

384 configurator_path: Path,) -> list[Path]: 

385 """Yield directories to clean up after configuration scenario is done. 

386 

387 Returns: 

388 list[str]: Full paths to directories that can be removed 

389 """ 

390 result = [] 

391 configurator_solver_path = configurator_path / "scenarios"\ 

392 / f"{self.solver.name}_{self.instance_set.name}" 

393 

394 for index in range(self.number_of_runs): 

395 dir = configurator_solver_path / str(index) 

396 result.append(dir) 

397 return result 

398 

399 @staticmethod 

400 def from_file(scenario_file: Path, solver: Solver, instance_set: InstanceSet, 

401 ) -> ConfigurationScenario: 

402 """Reads scenario file and initalises ConfigurationScenario.""" 

403 config = {} 

404 with scenario_file.open() as file: 

405 for line in file: 

406 key, value = line.strip().split(" = ") 

407 config[key] = value 

408 

409 # Collect relevant settings 

410 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None 

411 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \ 

412 else None 

413 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \ 

414 else None 

415 use_features = bool(config["feature_file"]) if "feature_file" in config \ 

416 else None 

417 

418 objective_str = config["algo"].split(" ")[-1] 

419 objective = SparkleObjective(objective_str) 

420 results_folder = scenario_file.parent / "results" 

421 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()] 

422 number_of_runs = len(state_run_dirs) 

423 return SMAC2Scenario(solver, 

424 instance_set, 

425 number_of_runs, 

426 solver_calls, 

427 cpu_time, 

428 wallclock_limit, 

429 int(config["cutoffTime"]), 

430 config["cutoff_length"], 

431 [objective], 

432 use_features)