Coverage for sparkle/configurator/implementations/irace.py: 48%

178 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-03 10:42 +0000

1"""Configurator classes to implement IRACE in Sparkle.""" 

2from __future__ import annotations 

3import shutil 

4import subprocess 

5from pathlib import Path 

6 

7from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

8from sparkle.solver import Solver 

9from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

10from sparkle.instance import InstanceSet, Instance_Set 

11from sparkle.types import SparkleObjective, resolve_objective 

12 

13import runrunner as rrr 

14from runrunner import Runner, Run 

15 

16 

17class IRACE(Configurator): 

18 """Class for IRACE configurator.""" 

19 configurator_path = Path(__file__).parent.parent.parent.resolve() /\ 

20 "Components/irace-v4.2.0" 

21 configurator_package = configurator_path / "irace_4.2.0.tar" 

22 # NOTE: There are possible dependencies that we do not install here. 

23 # TODO: Determine if we should add them or not. 

24 package_dependencies = ["codetools_0.2-20.tar", "data.table_1.16.4.tar", 

25 "matrixStats_1.5.0.tar", "spacefillr_0.3.3.tar"] 

26 configurator_executable = configurator_path / "irace" / "bin" / "irace" 

27 configurator_ablation_executable = configurator_path / "irace" / "bin" / "ablation" 

28 configurator_target = configurator_path / "irace_target_algorithm.py" 

29 

30 version = "3.5" 

31 full_name = "Iterated Racing for Automatic Algorithm Configuration" 

32 

33 def __init__(self: Configurator, 

34 output_path: Path, 

35 base_dir: Path, 

36 ) -> None: 

37 """Initialize IRACE configurator.""" 

38 output_path = output_path / IRACE.__name__ 

39 output_path.mkdir(parents=True, exist_ok=True) 

40 super().__init__(output_path=output_path, 

41 base_dir=base_dir, 

42 tmp_path=output_path / "tmp", 

43 multi_objective_support=False) 

44 

45 @property 

46 def name(self: IRACE) -> str: 

47 """Returns the name of the configurator.""" 

48 return IRACE.__name__ 

49 

50 @staticmethod 

51 def scenario_class() -> ConfigurationScenario: 

52 """Returns the IRACE scenario class.""" 

53 return IRACEScenario 

54 

55 def configure(self: IRACE, 

56 scenario: ConfigurationScenario, 

57 data_target: PerformanceDataFrame, 

58 validate_after: bool = True, 

59 sbatch_options: list[str] = [], 

60 slurm_prepend: str | list[str] | Path = None, 

61 num_parallel_jobs: int = None, 

62 base_dir: Path = None, 

63 run_on: Runner = Runner.SLURM) -> Run: 

64 """Start configuration job. 

65 

66 Args: 

67 scenario: ConfigurationScenario to execute. 

68 data_target: PerformanceDataFrame where to store the found configurations 

69 validate_after: Whether to validate the configuration on the training set 

70 afterwards or not. 

71 sbatch_options: List of slurm batch options to use 

72 slurm_prepend: Slurm script to prepend to the sbatch 

73 num_parallel_jobs: The maximum number of jobs to run in parallel 

74 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed 

75 run_on: On which platform to run the jobs. Default: Slurm. 

76 

77 Returns: 

78 A RunRunner Run object. 

79 """ 

80 scenario.create_scenario() 

81 output_csv = scenario.validation / "configurations.csv" 

82 output_csv.parent.mkdir(exist_ok=True, parents=True) 

83 

84 # Create command to call IRACE. Create plural based on number of runs var 

85 # We set the seed over the last n run ids in the dataframe 

86 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:] 

87 output_files = [ 

88 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata" 

89 for job_idx in seeds] 

90 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} " 

91 f"{IRACE.__name__} {output_path} {data_target.csv_filepath} " 

92 f"{scenario.scenario_file_path} {seed} " 

93 f"{IRACE.configurator_executable.absolute()} " 

94 f"--scenario {scenario.scenario_file_path} " 

95 f"--log-file {output_path} " 

96 f"--seed {seed}" for seed, output_path in zip(seeds, output_files)] 

97 runs = [rrr.add_to_queue( 

98 runner=run_on, 

99 cmd=cmds, 

100 base_dir=base_dir, 

101 name=f"{self.name}: {scenario.solver.name} on {scenario.instance_set.name}", 

102 sbatch_options=sbatch_options, 

103 prepend=slurm_prepend, 

104 )] 

105 

106 if validate_after: 

107 validate = scenario.solver.run_performance_dataframe( 

108 scenario.instance_set, 

109 run_ids=seeds, 

110 performance_dataframe=data_target, 

111 cutoff_time=scenario.cutoff_time, 

112 run_on=run_on, 

113 sbatch_options=sbatch_options, 

114 log_dir=scenario.validation, 

115 base_dir=base_dir, 

116 dependencies=runs, 

117 slurm_prepend=slurm_prepend 

118 ) 

119 runs.append(validate) 

120 

121 if run_on == Runner.LOCAL: 

122 for run in runs: 

123 run.wait() 

124 

125 return runs 

126 

127 @staticmethod 

128 def organise_output(output_source: Path, 

129 output_target: Path, 

130 scenario: IRACEScenario, 

131 run_id: int) -> None | dict: 

132 """Method to restructure and clean up after a single configurator call.""" 

133 from filelock import FileLock 

134 get_config = subprocess.run( 

135 ["Rscript", "-e", 

136 'library("irace"); ' 

137 f'load("{output_source}"); ' 

138 "last <- length(iraceResults$iterationElites); " 

139 "id <- iraceResults$iterationElites[last]; " 

140 "print(getConfigurationById(iraceResults, ids = id))"], 

141 capture_output=True) 

142 r_table = get_config.stdout.decode() 

143 if get_config.returncode != 0 or r_table.strip() == "": 

144 raise RuntimeError("Failed to get configuration from IRACE file " 

145 f"{output_source}:\n" 

146 f"{get_config.stdout.decode()}\n" 

147 f"{get_config.stderr.decode()}") 

148 

149 # Join the table header and content together 

150 header = "" 

151 content = "" 

152 for i, line in enumerate(r_table.splitlines()): 

153 if i & 1 == 0: # Even lines are headers 

154 header += line 

155 else: # Odd lines are parameter values 

156 # First element is the ID 

157 line = " ".join(line.split(" ")[1:]) 

158 content += line 

159 # First header item is the ID 

160 header = [x for x in header.split(" ") if x != ""][1:] 

161 content = [x for x in content.split(" ") if x != ""][1:] 

162 configuration = "" 

163 for parameter, value in zip(header, content): 

164 if not parameter == ".PARENT." and value != "NA" and value != "<NA>": 

165 configuration += f"--{parameter} {value} " 

166 configuration = Solver.config_str_to_dict(configuration) 

167 if output_target is None or not output_target.exists(): 

168 return configuration 

169 

170 time_stamp = scenario.scenario_file_path.stat().st_mtime 

171 configuration["configuration_id"] =\ 

172 f"{IRACE.__name__}_{time_stamp}_{run_id}" 

173 instance_names = scenario.instance_set.instance_names 

174 lock = FileLock(f"{output_target}.lock") 

175 with lock.acquire(timeout=60): 

176 performance_data = PerformanceDataFrame(output_target) 

177 # Resolve absolute path to Solver column 

178 solver = [s for s in performance_data.solvers 

179 if Path(s).name == scenario.solver.name][0] 

180 # For some reason the instance paths in the instance set are absolute 

181 instances = [instance for instance in performance_data.instances 

182 if Path(instance).name in instance_names] 

183 # We don't set the seed in the dataframe, as that should be part of the conf 

184 performance_data.set_value( 

185 value=[str(configuration)], 

186 solver=solver, 

187 instance=instances, 

188 objective=None, 

189 run=run_id, 

190 solver_fields=[PerformanceDataFrame.column_configuration] 

191 ) 

192 performance_data.save_csv() 

193 

194 def get_status_from_logs(self: Configurator) -> None: 

195 """Method to scan the log files of the configurator for warnings.""" 

196 raise NotImplementedError 

197 

198 

199class IRACEScenario(ConfigurationScenario): 

200 """Class for IRACE scenario.""" 

201 

202 def __init__(self: ConfigurationScenario, 

203 solver: Solver, 

204 instance_set: InstanceSet, 

205 sparkle_objectives: list[SparkleObjective], 

206 parent_directory: Path, 

207 number_of_runs: int = None, solver_calls: int = None, 

208 cutoff_time: int = None, 

209 max_time: int = None, 

210 budget_estimation: float = None, 

211 first_test: int = None, 

212 mu: int = None, 

213 max_iterations: int = None, 

214 feature_data: FeatureDataFrame = None, 

215 )\ 

216 -> None: 

217 """Initialize scenario paths and names. 

218 

219 Args: 

220 solver: Solver that should be configured. 

221 instance_set: Instances object for the scenario. 

222 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

223 Will be simplified to the first objective. 

224 parent_directory: Path where the scenario files will be placed. 

225 number_of_runs: The number of configurator runs to perform 

226 for configuring the solver. 

227 solver_calls: The number of times the solver is called for each 

228 configuration run. [MaxExperiments] 

229 cutoff_time: The maximum time allowed for each individual run during 

230 configuration. 

231 max_time: The time budget (CPU) allocated for the sum of solver calls 

232 done by the configurator in seconds. [MaxTime] 

233 budget_estimation: Fraction (smaller than 1) of the budget used to estimate 

234 the mean computation time of a configuration. Only used when maxTime > 0. 

235 Default: Computed as cutoff_time / max_time. [BudgetEstimation] 

236 first_test: Specifies how many instances are evaluated before the first 

237 elimination test. IRACE Default: 5. [firstTest] 

238 mu: Parameter used to define the number of configurations sampled and 

239 evaluated at each iteration. IRACE Default: 5. [mu] 

240 max_iterations: Maximum number of iterations to be executed. Each iteration 

241 involves the generation of new configurations and the use of racing to 

242 select the best configurations. By default (with 0), irace calculates a 

243 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where 

244 N^param is the number of non-fixed parameters to be tuned. 

245 Setting this parameter may make irace stop sooner than it should without 

246 using all the available budget. We recommend to use the default value. 

247 feature_data: FeatureDataFrame object with the feature data. 

248 Currently not supported by IRACE. 

249 """ 

250 """ 

251 Other possible arguments that are not added yet to Sparkle: 

252 --test-num-elites Number of elite configurations returned by irace that 

253 will be tested if test instances are provided. 

254 Default: 1. 

255 --test-iteration-elites Enable/disable testing the elite configurations 

256 found at each iteration. Default: 0. 

257 --test-type Statistical test used for elimination. The default 

258 value selects t-test if capping is enabled or F-test, 

259 otherwise. Valid values are: F-test (Friedman test), 

260 t-test (pairwise t-tests with no correction), 

261 t-test-bonferroni (t-test with Bonferroni's correction 

262 for multiple comparisons), t-test-holm (t-test with 

263 Holm's correction for multiple comparisons). 

264 --each-test Number of instances evaluated between elimination 

265 tests. Default: 1. 

266 --load-balancing Enable/disable load-balancing when executing 

267 experiments in parallel. Load-balancing makes better 

268 use of computing resources, but increases 

269 communication overhead. If this overhead is large, 

270 disabling load-balancing may be faster. Default: 1. 

271 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner 

272 in parallel (parameter parallel is the number of 

273 slaves). Default: 0. 

274 --batchmode Specify how irace waits for jobs to finish when 

275 targetRunner submits jobs to a batch cluster: sge, 

276 pbs, torque, slurm or htcondor. targetRunner must 

277 submit jobs to the cluster using, for example, qsub. 

278 Default: 0. 

279 --digits Maximum number of decimal places that are significant 

280 for numerical (real) parameters. Default: 4. 

281 --soft-restart Enable/disable the soft restart strategy that avoids 

282 premature convergence of the probabilistic model. 

283 Default: 1. 

284 --soft-restart-threshold Soft restart threshold value for numerical 

285 parameters. If NA, NULL or "", it is computed as 

286 10^-digits. 

287 -e,--elitist Enable/disable elitist irace. Default: 1. 

288 --elitist-new-instances Number of instances added to the execution list 

289 before previous instances in elitist irace. Default: 

290 1. 

291 --elitist-limit In elitist irace, maximum number per race of 

292 elimination tests that do not eliminate a 

293 configuration. Use 0 for no limit. Default: 2. 

294 --capping Enable the use of adaptive capping, a technique 

295 designed for minimizing the computation time of 

296 configurations. This is only available when elitist is 

297 active. Default: 0. 

298 --capping-type Measure used to obtain the execution bound from the 

299 performance of the elite configurations: median, mean, 

300 worst, best. Default: median. 

301 --bound-type Method to calculate the mean performance of elite 

302 configurations: candidate or instance. Default: 

303 candidate. 

304 --bound-max Maximum execution bound for targetRunner. It must be 

305 specified when capping is enabled. Default: 0. 

306 --bound-digits Precision used for calculating the execution time. It 

307 must be specified when capping is enabled. Default: 0. 

308 --bound-par Penalization constant for timed out executions 

309 (executions that reach boundMax execution time). 

310 Default: 1. 

311 --bound-as-timeout Replace the configuration cost of bounded executions 

312 with boundMax. Default: 1. 

313 --postselection Percentage of the configuration budget used to perform 

314 a postselection race of the best configurations of 

315 each iteration after the execution of irace. Default: 

316 0. 

317 --iterations Maximum number of iterations. Default: 0. 

318 --experiments-per-iteration Number of runs of the target algorithm per 

319 iteration. Default: 0. 

320 --min-survival Minimum number of configurations needed to continue 

321 the execution of each race (iteration). Default: 0. 

322 --num-configurations Number of configurations to be sampled and evaluated 

323 at each iteration. Default: 0. 

324 --confidence Confidence level for the elimination test. Default: 

325 0.95.""" 

326 super().__init__(solver, instance_set, sparkle_objectives, parent_directory) 

327 self.solver = solver 

328 self.instance_set = instance_set 

329 if sparkle_objectives is not None: 

330 self.sparkle_objective = sparkle_objectives[0] 

331 else: 

332 self.sparkle_objective = None 

333 

334 if feature_data is not None: 

335 print("WARNING: Instance features currently not supported by IRACE.") 

336 

337 self.number_of_runs = number_of_runs 

338 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None 

339 self.max_time = max_time if max_time and max_time > 0 else None 

340 self.cutoff_time = cutoff_time 

341 self.budget_estimation = budget_estimation 

342 self.first_test = first_test 

343 self.mu = mu 

344 self.max_iterations = max_iterations 

345 

346 # Pathing 

347 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt" 

348 self.tmp = self.directory / "tmp" 

349 self.validation = self.directory / "validation" 

350 self.results_directory = self.directory / "results" 

351 

352 def create_scenario(self: IRACEScenario) -> None: 

353 """Create scenario with solver and instances in the parent directory. 

354 

355 This prepares all the necessary subdirectories related to configuration. 

356 Removes any existing directory if it overlaps with the scenario name. 

357 

358 Args: 

359 parent_directory: Directory in which the scenario should be created. 

360 """ 

361 # Set up directories 

362 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory 

363 self.directory.mkdir(exist_ok=True, parents=True) 

364 self.tmp.mkdir(exist_ok=True) 

365 self.validation.mkdir(exist_ok=True) 

366 self.results_directory.mkdir(exist_ok=True) 

367 

368 with self.instance_file_path.open("w+") as file: 

369 for instance_path in self.instance_set._instance_paths: 

370 file.write(f"{instance_path.name}\n") 

371 self.create_scenario_file() 

372 

373 def create_scenario_file(self: ConfigurationScenario) -> Path: 

374 """Create a file from the IRACE scenario. 

375 

376 Returns: 

377 Path to the created file. 

378 """ 

379 from sparkle.tools.parameters import PCSConvention 

380 solver_path = self.solver.directory.absolute() 

381 pcs_path = self.solver.get_pcs_file(port_type=PCSConvention.IRACE).absolute() 

382 with self.scenario_file_path.open("w") as file: 

383 file.write( 

384 f'execDir = "{self.directory.absolute()}"\n' 

385 'targetRunnerLauncher = "python3"\n' 

386 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n' 

387 'targetCmdline = "{targetRunner} ' 

388 f"{solver_path} {self.sparkle_objective} {self.cutoff_time} " 

389 '{configurationID} {instanceID} {seed} {instance} {targetRunnerArgs}"\n' 

390 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

391 f'parameterFile = "{pcs_path.absolute()}"\n' 

392 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n' 

393 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n' 

394 "debugLevel = 1\n" # The verbosity level of IRACE 

395 ) 

396 if self.solver_calls is not None: 

397 file.write(f"maxExperiments = {self.solver_calls}\n") 

398 elif self.max_time is not None: 

399 file.write(f"maxTime = {self.max_time}\n") 

400 if self.solver_calls is not None and self.max_time is not None: 

401 print("WARNING: Both solver calls and max time specified for scenario. " 

402 "This is not supported by IRACE, defaulting to solver calls.") 

403 elif self.solver_calls is None and self.max_time is None: 

404 print("WARNING: Neither solver calls nor max time specified. " 

405 "Either budget is required for the IRACE scenario.") 

406 if self.max_time is not None and self.budget_estimation is None: 

407 # Auto Estimate 

408 if self.cutoff_time < self.max_time: 

409 self.budget_estimation = self.cutoff_time / self.max_time 

410 file.write(f"budgetEstimation = {self.budget_estimation}\n") 

411 if self.first_test is not None: 

412 file.write(f"firstTest = {self.first_test}\n") 

413 if self.mu is not None: 

414 file.write(f"mu = {self.mu}\n") 

415 if self.max_iterations is not None: 

416 file.write(f"nbIterations = {self.max_iterations}\n") 

417 print("Verifying contents of IRACE scenario file and testing solver call...") 

418 check_file = subprocess.run( 

419 [f"{IRACE.configurator_executable.absolute()}", 

420 "-s", f"{self.scenario_file_path.absolute()}", "--check"], 

421 capture_output=True) 

422 if check_file.returncode != 0: 

423 stdout_msg = "\n".join([ 

424 line for line in check_file.stdout.decode().splitlines() 

425 if not line.startswith("#")]) 

426 print("An error occured in the IRACE scenario file:\n", 

427 self.scenario_file_path.open("r").read(), 

428 stdout_msg, "\n", 

429 check_file.stderr.decode()) 

430 return None 

431 print("IRACE scenario file is valid.") 

432 return self.scenario_file_path 

433 

434 def serialize(self: IRACEScenario) -> dict: 

435 """Serialize the IRACE scenario.""" 

436 return { 

437 "number_of_runs": self.number_of_runs, 

438 "solver_calls": self.solver_calls, 

439 "max_time": self.max_time, 

440 "cutoff_time": self.cutoff_time, 

441 "budget_estimation": self.budget_estimation, 

442 "first_test": self.first_test, 

443 "mu": self.mu, 

444 "max_iterations": self.max_iterations, 

445 } 

446 

447 @staticmethod 

448 def from_file(scenario_file: Path) -> IRACEScenario: 

449 """Reads scenario file and initalises IRACEScenario.""" 

450 scenario_dict = {keyvalue[0]: keyvalue[1] 

451 for keyvalue in (line.split(" = ", maxsplit=1) 

452 for line in scenario_file.open().readlines() 

453 if line.strip() != "")} 

454 _, solver_path, objective, cutoff, _, _, _, _, _ =\ 

455 scenario_dict.pop("targetCmdline").split(" ") 

456 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)] 

457 scenario_dict["cutoff_time"] = int(cutoff) 

458 scenario_dict["parent_directory"] = scenario_file.parent.parent 

459 scenario_dict["number_of_runs"] =\ 

460 len([p for p in (scenario_file.parent / "results").iterdir()]) 

461 scenario_dict.pop("targetRunner") 

462 scenario_dict.pop("execDir") 

463 scenario_dict.pop("targetRunnerLauncher") 

464 scenario_dict.pop("deterministic") 

465 scenario_dict.pop("parameterFile") 

466 scenario_dict.pop("debugLevel") 

467 instance_set_path =\ 

468 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"')) 

469 instance_set = Instance_Set(instance_set_path) 

470 solver = Solver(Path(solver_path.strip())) 

471 scenario_dict.pop("trainInstancesFile") 

472 # Replace keys with scenario variable names 

473 if "budgetEstimation" in scenario_dict: 

474 scenario_dict["budget_estimation"] =\ 

475 float(scenario_dict.pop(("budgetEstimation"))) 

476 if "firstTest" in scenario_dict: 

477 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest")) 

478 if "mu" in scenario_dict: 

479 scenario_dict["mu"] = int(scenario_dict.pop("mu")) 

480 if "nbIterations" in scenario_dict: 

481 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations")) 

482 if "maxExperiments" in scenario_dict: 

483 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments")) 

484 if "maxTime" in scenario_dict: 

485 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime")) 

486 

487 return IRACEScenario(solver, instance_set, **scenario_dict)