Coverage for sparkle/configurator/implementations/irace.py: 83%

164 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 14:48 +0000

1"""Configurator classes to implement IRACE in Sparkle.""" 

2from __future__ import annotations 

3import shutil 

4import subprocess 

5from pathlib import Path 

6 

7from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

8from sparkle.solver import Solver, Validator 

9from sparkle.instance import InstanceSet, Instance_Set 

10from sparkle.types import SparkleObjective, resolve_objective 

11 

12import runrunner as rrr 

13from runrunner import Runner, Run 

14 

15 

16class IRACE(Configurator): 

17 """Class for IRACE configurator.""" 

18 configurator_path = Path(__file__).parent.parent.parent.resolve() /\ 

19 "Components/irace-v3.5" 

20 configurator_package = configurator_path / "irace_3.5.tar.gz" 

21 r6_dependency_package = configurator_path / "R6_2.5.1.tar.gz" 

22 configurator_executable = configurator_path / "irace" / "bin" / "irace" 

23 configurator_ablation_executable = configurator_path / "irace" / "bin" / "ablation" 

24 configurator_target = configurator_path / "irace_target_algorithm.py" 

25 

26 version = "3.5" 

27 full_name = "Iterated Racing for Automatic Algorithm Configuration" 

28 

29 def __init__(self: Configurator, 

30 output_path: Path, 

31 base_dir: Path, 

32 ) -> None: 

33 """Initialize IRACE configurator.""" 

34 output_path = output_path / IRACE.__name__ 

35 output_path.mkdir(parents=True, exist_ok=True) 

36 validator = Validator(out_dir=output_path) 

37 super().__init__(validator=validator, 

38 output_path=output_path, 

39 base_dir=base_dir, 

40 tmp_path=output_path / "tmp", 

41 multi_objective_support=False) 

42 

43 @property 

44 def name(self: IRACE) -> str: 

45 """Returns the name of the configurator.""" 

46 return IRACE.__name__ 

47 

48 @property 

49 def scenario_class(self: IRACE) -> ConfigurationScenario: 

50 """Returns the IRACE scenario class.""" 

51 return IRACEScenario 

52 

53 def configure(self: IRACE, 

54 scenario: ConfigurationScenario, 

55 validate_after: bool = True, 

56 sbatch_options: list[str] = [], 

57 num_parallel_jobs: int = None, 

58 base_dir: Path = None, 

59 run_on: Runner = Runner.SLURM) -> Run: 

60 """Start configuration job. 

61 

62 Args: 

63 scenario: ConfigurationScenario to execute. 

64 validate_after: Whether to validate the configuration on the training set 

65 afterwards or not. 

66 sbatch_options: List of slurm batch options to use 

67 num_parallel_jobs: The maximum number of jobs to run in parallel 

68 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed 

69 run_on: On which platform to run the jobs. Default: Slurm. 

70 

71 Returns: 

72 A RunRunner Run object. 

73 """ 

74 scenario.create_scenario() 

75 output_csv = scenario.validation / "configurations.csv" 

76 output_csv.parent.mkdir(exist_ok=True, parents=True) 

77 

78 # Create command to call IRACE. Create plural based on number of runs var 

79 output_files = [ 

80 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata" 

81 for job_idx in range(0, scenario.number_of_runs)] 

82 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} " 

83 f"{IRACE.__name__} {output_files[job_idx]} {output_csv.absolute()} " 

84 f"{IRACE.configurator_executable.absolute()} " 

85 f"--scenario {scenario.scenario_file_path.absolute()} " 

86 f"--log-file {output_files[job_idx]} " 

87 f"--seed {job_idx}" for job_idx in range(0, scenario.number_of_runs)] 

88 runs = [rrr.add_to_queue( 

89 runner=run_on, 

90 cmd=cmds, 

91 base_dir=base_dir, 

92 name=f"IRACE: {scenario.solver.name} on {scenario.instance_set.name}", 

93 sbatch_options=sbatch_options, 

94 )] 

95 if validate_after: 

96 self.validator.out_dir = output_csv.parent 

97 self.validator.tmp_out_dir = base_dir 

98 validate_run = self.validator.validate( 

99 [scenario.solver] * scenario.number_of_runs, 

100 output_csv, 

101 [scenario.instance_set], 

102 [scenario.sparkle_objective], 

103 scenario.cutoff_time, 

104 subdir=Path(), 

105 dependency=runs, 

106 sbatch_options=sbatch_options, 

107 run_on=run_on) 

108 runs.append(validate_run) 

109 return runs 

110 

111 @staticmethod 

112 def organise_output(output_source: Path, output_target: Path) -> None | str: 

113 """Method to restructure and clean up after a single configurator call.""" 

114 import fcntl 

115 get_config = subprocess.run( 

116 ["Rscript", "-e", 

117 'library("irace"); ' 

118 f'load("{output_source}"); ' 

119 "last <- length(iraceResults$iterationElites); " 

120 "id <- iraceResults$iterationElites[last]; " 

121 "print(getConfigurationById(iraceResults, ids = id))"], 

122 capture_output=True) 

123 r_table = get_config.stdout.decode() 

124 if get_config.returncode != 0 or r_table.strip() == "": 

125 raise RuntimeError("Failed to get configuration from IRACE file " 

126 f"{output_source}:\n" 

127 f"{get_config.stdout.decode()}\n" 

128 f"{get_config.stderr.decode()}") 

129 

130 # Join the table header and content together 

131 header = "" 

132 content = "" 

133 for i, line in enumerate(r_table.splitlines()): 

134 if i & 1 == 0: # Even lines are headers 

135 header += line 

136 else: # Odd lines are parameter values 

137 # First element is the ID 

138 line = " ".join(line.split(" ")[1:]) 

139 content += line 

140 # First header item is the ID 

141 header = [x for x in header.split(" ") if x != ""][1:] 

142 content = [x for x in content.split(" ") if x != ""][1:] 

143 configuration = "" 

144 for parameter, value in zip(header, content): 

145 if not parameter == ".PARENT." and value != "NA" and value != "<NA>": 

146 configuration += f"--{parameter} {value} " 

147 

148 with output_target.open("a") as fout: 

149 fcntl.flock(fout.fileno(), fcntl.LOCK_EX) 

150 fout.write(configuration + "\n") 

151 

152 def get_status_from_logs(self: Configurator) -> None: 

153 """Method to scan the log files of the configurator for warnings.""" 

154 raise NotImplementedError 

155 

156 

157class IRACEScenario(ConfigurationScenario): 

158 """Class for IRACE scenario.""" 

159 

160 def __init__(self: ConfigurationScenario, 

161 solver: Solver, 

162 instance_set: InstanceSet, 

163 sparkle_objectives: list[SparkleObjective], 

164 parent_directory: Path, 

165 number_of_runs: int = None, solver_calls: int = None, 

166 cutoff_time: int = None, 

167 max_time: int = None, 

168 budget_estimation: float = None, 

169 first_test: int = None, 

170 mu: int = None, 

171 max_iterations: int = None, 

172 )\ 

173 -> None: 

174 """Initialize scenario paths and names. 

175 

176 Args: 

177 solver: Solver that should be configured. 

178 instance_set: Instances object for the scenario. 

179 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

180 Will be simplified to the first objective. 

181 parent_directory: Path where the scenario files will be placed. 

182 number_of_runs: The number of configurator runs to perform 

183 for configuring the solver. 

184 solver_calls: The number of times the solver is called for each 

185 configuration run. [MaxExperiments] 

186 cutoff_time: The maximum time allowed for each individual run during 

187 configuration. 

188 max_time: The time budget (CPU) allocated for the sum of solver calls 

189 done by the configurator in seconds. [MaxTime] 

190 budget_estimation: Fraction (smaller than 1) of the budget used to estimate 

191 the mean computation time of a configuration. Only used when maxTime > 0. 

192 Default: Computed as cutoff_time / max_time. [BudgetEstimation] 

193 first_test: Specifies how many instances are evaluated before the first 

194 elimination test. IRACE Default: 5. [firstTest] 

195 mu: Parameter used to define the number of configurations sampled and 

196 evaluated at each iteration. IRACE Default: 5. [mu] 

197 max_iterations: Maximum number of iterations to be executed. Each iteration 

198 involves the generation of new configurations and the use of racing to 

199 select the best configurations. By default (with 0), irace calculates a 

200 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where 

201 N^param is the number of non-fixed parameters to be tuned. 

202 Setting this parameter may make irace stop sooner than it should without 

203 using all the available budget. We recommend to use the default value. 

204 """ 

205 """ 

206 Other possible arguments that are not added yet to Sparkle: 

207 --test-num-elites Number of elite configurations returned by irace that 

208 will be tested if test instances are provided. 

209 Default: 1. 

210 --test-iteration-elites Enable/disable testing the elite configurations 

211 found at each iteration. Default: 0. 

212 --test-type Statistical test used for elimination. The default 

213 value selects t-test if capping is enabled or F-test, 

214 otherwise. Valid values are: F-test (Friedman test), 

215 t-test (pairwise t-tests with no correction), 

216 t-test-bonferroni (t-test with Bonferroni's correction 

217 for multiple comparisons), t-test-holm (t-test with 

218 Holm's correction for multiple comparisons). 

219 --each-test Number of instances evaluated between elimination 

220 tests. Default: 1. 

221 --load-balancing Enable/disable load-balancing when executing 

222 experiments in parallel. Load-balancing makes better 

223 use of computing resources, but increases 

224 communication overhead. If this overhead is large, 

225 disabling load-balancing may be faster. Default: 1. 

226 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner 

227 in parallel (parameter parallel is the number of 

228 slaves). Default: 0. 

229 --batchmode Specify how irace waits for jobs to finish when 

230 targetRunner submits jobs to a batch cluster: sge, 

231 pbs, torque, slurm or htcondor. targetRunner must 

232 submit jobs to the cluster using, for example, qsub. 

233 Default: 0. 

234 --digits Maximum number of decimal places that are significant 

235 for numerical (real) parameters. Default: 4. 

236 --soft-restart Enable/disable the soft restart strategy that avoids 

237 premature convergence of the probabilistic model. 

238 Default: 1. 

239 --soft-restart-threshold Soft restart threshold value for numerical 

240 parameters. If NA, NULL or "", it is computed as 

241 10^-digits. 

242 -e,--elitist Enable/disable elitist irace. Default: 1. 

243 --elitist-new-instances Number of instances added to the execution list 

244 before previous instances in elitist irace. Default: 

245 1. 

246 --elitist-limit In elitist irace, maximum number per race of 

247 elimination tests that do not eliminate a 

248 configuration. Use 0 for no limit. Default: 2. 

249 --capping Enable the use of adaptive capping, a technique 

250 designed for minimizing the computation time of 

251 configurations. This is only available when elitist is 

252 active. Default: 0. 

253 --capping-type Measure used to obtain the execution bound from the 

254 performance of the elite configurations: median, mean, 

255 worst, best. Default: median. 

256 --bound-type Method to calculate the mean performance of elite 

257 configurations: candidate or instance. Default: 

258 candidate. 

259 --bound-max Maximum execution bound for targetRunner. It must be 

260 specified when capping is enabled. Default: 0. 

261 --bound-digits Precision used for calculating the execution time. It 

262 must be specified when capping is enabled. Default: 0. 

263 --bound-par Penalization constant for timed out executions 

264 (executions that reach boundMax execution time). 

265 Default: 1. 

266 --bound-as-timeout Replace the configuration cost of bounded executions 

267 with boundMax. Default: 1. 

268 --postselection Percentage of the configuration budget used to perform 

269 a postselection race of the best configurations of 

270 each iteration after the execution of irace. Default: 

271 0. 

272 --iterations Maximum number of iterations. Default: 0. 

273 --experiments-per-iteration Number of runs of the target algorithm per 

274 iteration. Default: 0. 

275 --min-survival Minimum number of configurations needed to continue 

276 the execution of each race (iteration). Default: 0. 

277 --num-configurations Number of configurations to be sampled and evaluated 

278 at each iteration. Default: 0. 

279 --confidence Confidence level for the elimination test. Default: 

280 0.95.""" 

281 super().__init__(solver, instance_set, sparkle_objectives, parent_directory) 

282 self.solver = solver 

283 self.instance_set = instance_set 

284 if sparkle_objectives is not None: 

285 if len(sparkle_objectives) > 1: 

286 print("WARNING: IRACE does not have multi objective support. " 

287 "Only the first objective will be used.") 

288 self.sparkle_objective = sparkle_objectives[0] 

289 else: 

290 self.sparkle_objective = None 

291 

292 self.number_of_runs = number_of_runs 

293 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None 

294 self.max_time = max_time if max_time and max_time > 0 else None 

295 self.cutoff_time = cutoff_time 

296 self.budget_estimation = budget_estimation 

297 self.first_test = first_test 

298 self.mu = mu 

299 self.max_iterations = max_iterations 

300 

301 # Pathing 

302 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt" 

303 self.tmp = self.directory / "tmp" 

304 self.validation = self.directory / "validation" 

305 self.results_directory = self.directory / "results" 

306 

307 def create_scenario(self: IRACEScenario) -> None: 

308 """Create scenario with solver and instances in the parent directory. 

309 

310 This prepares all the necessary subdirectories related to configuration. 

311 Removes any existing directory if it overlaps with the scenario name. 

312 

313 Args: 

314 parent_directory: Directory in which the scenario should be created. 

315 """ 

316 # Set up directories 

317 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory 

318 self.directory.mkdir(exist_ok=True, parents=True) 

319 self.tmp.mkdir(exist_ok=True) 

320 self.validation.mkdir(exist_ok=True) 

321 self.results_directory.mkdir(exist_ok=True) 

322 

323 with self.instance_file_path.open("w+") as file: 

324 for instance_path in self.instance_set._instance_paths: 

325 file.write(f"{instance_path.name}\n") 

326 self.create_scenario_file() 

327 

328 def create_scenario_file(self: ConfigurationScenario) -> Path: 

329 """Create a file from the IRACE scenario. 

330 

331 Returns: 

332 Path to the created file. 

333 """ 

334 solver_path = self.solver.directory.absolute() 

335 with self.scenario_file_path.open("w") as file: 

336 file.write( 

337 f'execDir = "{self.directory.absolute()}"\n' 

338 'targetRunnerLauncher = "python3"\n' 

339 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n' 

340 'targetRunnerLauncherArgs = "{targetRunner} ' 

341 f"{solver_path} {self.sparkle_objective} {self.cutoff_time} " 

342 '{targetRunnerArgs}"\n' 

343 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

344 "parameterFile = " 

345 f'"{self.solver.get_pcs_file(port_type="""IRACE""").absolute()}"\n' 

346 "forbiddenFile = " 

347 f'"{self.solver.get_forbidden(port_type="""IRACE""").absolute()}"\n' 

348 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n' 

349 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n' 

350 "debugLevel = 1\n" # The verbosity level of IRACE 

351 ) 

352 if self.solver_calls is not None: 

353 file.write(f"maxExperiments = {self.solver_calls}\n") 

354 elif self.max_time is not None: 

355 file.write(f"maxTime = {self.max_time}\n") 

356 if self.solver_calls is not None and self.max_time is not None: 

357 print("WARNING: Both solver calls and max time specified for scenario. " 

358 "This is not supported by IRACE, defaulting to solver calls.") 

359 elif self.solver_calls is None and self.max_time is None: 

360 print("WARNING: Neither solver calls nor max time specified. " 

361 "Either budget is required for the IRACE scenario.") 

362 if self.max_time is not None and self.budget_estimation is None: 

363 # Auto Estimate 

364 if self.cutoff_time < self.max_time: 

365 self.budget_estimation = self.cutoff_time / self.max_time 

366 file.write(f"budgetEstimation = {self.budget_estimation}\n") 

367 if self.first_test is not None: 

368 file.write(f"firstTest = {self.first_test}\n") 

369 if self.mu is not None: 

370 file.write(f"mu = {self.mu}\n") 

371 if self.max_iterations is not None: 

372 file.write(f"nbIterations = {self.max_iterations}\n") 

373 print("Verifying contents of IRACE scenario file and testing solver call...") 

374 check_file = subprocess.run( 

375 [f"{IRACE.configurator_executable.absolute()}", 

376 "-s", f"{self.scenario_file_path.absolute()}", "--check"], 

377 capture_output=True) 

378 if check_file.returncode != 0: 

379 stdout_msg = "\n".join([ 

380 line for line in check_file.stdout.decode().splitlines() 

381 if not line.startswith("#")]) 

382 print("An error occured in the IRACE scenario file:\n", 

383 self.scenario_file_path.open("r").read(), 

384 stdout_msg, "\n", 

385 check_file.stderr.decode()) 

386 else: 

387 print("IRACE scenario file is valid.") 

388 return self.scenario_file_path 

389 

390 def serialize(self: IRACEScenario) -> dict: 

391 """Serialize the IRACE scenario.""" 

392 return { 

393 "number_of_runs": self.number_of_runs, 

394 "solver_calls": self.solver_calls, 

395 "max_time": self.max_time, 

396 "cutoff_time": self.cutoff_time, 

397 "budget_estimation": self.budget_estimation, 

398 "first_test": self.first_test, 

399 "mu": self.mu, 

400 "max_iterations": self.max_iterations, 

401 } 

402 

403 @staticmethod 

404 def from_file(scenario_file: Path) -> IRACEScenario: 

405 """Reads scenario file and initalises IRACEScenario.""" 

406 scenario_dict = {keyvalue[0]: keyvalue[1] 

407 for keyvalue in (line.split(" = ", maxsplit=1) 

408 for line in scenario_file.open().readlines() 

409 if line.strip() != "")} 

410 _, solver_path, objective, cutoff, _ =\ 

411 scenario_dict.pop("targetRunnerLauncherArgs").split(" ") 

412 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)] 

413 scenario_dict["cutoff_time"] = int(cutoff) 

414 scenario_dict["parent_directory"] = scenario_file.parent.parent 

415 scenario_dict["number_of_runs"] =\ 

416 len([p for p in (scenario_file.parent / "results").iterdir()]) 

417 scenario_dict.pop("targetRunner") 

418 scenario_dict.pop("execDir") 

419 scenario_dict.pop("targetRunnerLauncher") 

420 scenario_dict.pop("deterministic") 

421 scenario_dict.pop("parameterFile") 

422 scenario_dict.pop("forbiddenFile") 

423 scenario_dict.pop("debugLevel") 

424 instance_set_path =\ 

425 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"')) 

426 instance_set = Instance_Set(instance_set_path) 

427 solver = Solver(Path(solver_path.strip())) 

428 scenario_dict.pop("trainInstancesFile") 

429 # Replace keys with scenario variable names 

430 if "budgetEstimation" in scenario_dict: 

431 scenario_dict["budget_estimation"] =\ 

432 float(scenario_dict.pop(("budgetEstimation"))) 

433 if "firstTest" in scenario_dict: 

434 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest")) 

435 if "mu" in scenario_dict: 

436 scenario_dict["mu"] = int(scenario_dict.pop("mu")) 

437 if "nbIterations" in scenario_dict: 

438 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations")) 

439 if "maxExperiments" in scenario_dict: 

440 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments")) 

441 if "maxTime" in scenario_dict: 

442 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime")) 

443 

444 return IRACEScenario(solver, instance_set, **scenario_dict)