Coverage for sparkle/configurator/implementations/irace.py: 50%

170 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-13 10:34 +0000

1"""Configurator classes to implement IRACE in Sparkle.""" 

2from __future__ import annotations 

3import shutil 

4import subprocess 

5from pathlib import Path 

6 

7from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

8from sparkle.solver import Solver 

9from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

10from sparkle.instance import InstanceSet, Instance_Set 

11from sparkle.types import SparkleObjective, resolve_objective 

12 

13from runrunner import Runner, Run 

14 

15 

16class IRACE(Configurator): 

17 """Class for IRACE configurator.""" 

18 configurator_path = Path(__file__).parent.parent.parent.resolve() /\ 

19 "Components/irace-v4.2.0" 

20 configurator_package = configurator_path / "irace_4.2.0.tar" 

21 # NOTE: There are possible dependencies that we do not install here. 

22 # TODO: Determine if we should add them or not. 

23 package_dependencies = ["codetools_0.2-20.tar", "data.table_1.16.4.tar", 

24 "matrixStats_1.5.0.tar", "spacefillr_0.3.3.tar"] 

25 configurator_executable = configurator_path / "irace" / "bin" / "irace" 

26 configurator_ablation_executable = configurator_path / "irace" / "bin" / "ablation" 

27 configurator_target = configurator_path / "irace_target_algorithm.py" 

28 

29 version = "3.5" 

30 full_name = "Iterated Racing for Automatic Algorithm Configuration" 

31 

32 def __init__(self: Configurator, 

33 output_path: Path, 

34 base_dir: Path, 

35 ) -> None: 

36 """Initialize IRACE configurator.""" 

37 output_path = output_path / IRACE.__name__ 

38 output_path.mkdir(parents=True, exist_ok=True) 

39 super().__init__(output_path=output_path, 

40 base_dir=base_dir, 

41 tmp_path=output_path / "tmp", 

42 multi_objective_support=False) 

43 

44 @property 

45 def name(self: IRACE) -> str: 

46 """Returns the name of the configurator.""" 

47 return IRACE.__name__ 

48 

49 @staticmethod 

50 def scenario_class() -> ConfigurationScenario: 

51 """Returns the IRACE scenario class.""" 

52 return IRACEScenario 

53 

54 def configure(self: IRACE, 

55 scenario: ConfigurationScenario, 

56 data_target: PerformanceDataFrame, 

57 validate_after: bool = True, 

58 sbatch_options: list[str] = [], 

59 slurm_prepend: str | list[str] | Path = None, 

60 num_parallel_jobs: int = None, 

61 base_dir: Path = None, 

62 run_on: Runner = Runner.SLURM) -> Run: 

63 """Start configuration job. 

64 

65 Args: 

66 scenario: ConfigurationScenario to execute. 

67 data_target: PerformanceDataFrame where to store the found configurations 

68 validate_after: Whether to validate the configuration on the training set 

69 afterwards or not. 

70 sbatch_options: List of slurm batch options to use 

71 slurm_prepend: Slurm script to prepend to the sbatch 

72 num_parallel_jobs: The maximum number of jobs to run in parallel 

73 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed 

74 run_on: On which platform to run the jobs. Default: Slurm. 

75 

76 Returns: 

77 A RunRunner Run object. 

78 """ 

79 scenario.create_scenario() 

80 output_csv = scenario.validation / "configurations.csv" 

81 output_csv.parent.mkdir(exist_ok=True, parents=True) 

82 

83 # Create command to call IRACE. Create plural based on number of runs var 

84 # We set the seed over the last n run ids in the dataframe 

85 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:] 

86 output_files = [ 

87 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata" 

88 for job_idx in seeds] 

89 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} " 

90 f"{IRACE.__name__} {output_path} {data_target.csv_filepath} " 

91 f"{scenario.scenario_file_path} {seed} " 

92 f"{IRACE.configurator_executable.absolute()} " 

93 f"--scenario {scenario.scenario_file_path} " 

94 f"--log-file {output_path} " 

95 f"--seed {seed}" for seed, output_path in zip(seeds, output_files)] 

96 return super().configure( 

97 configuration_commands=cmds, 

98 data_target=data_target, 

99 output=output_files, 

100 scenario=scenario, 

101 sbatch_options=sbatch_options, 

102 slurm_prepend=slurm_prepend, 

103 validation_ids=seeds if validate_after else None, 

104 num_parallel_jobs=num_parallel_jobs, 

105 base_dir=base_dir, 

106 run_on=run_on 

107 ) 

108 

109 @staticmethod 

110 def organise_output(output_source: Path, 

111 output_target: Path, 

112 scenario: IRACEScenario, 

113 run_id: int) -> None | dict: 

114 """Method to restructure and clean up after a single configurator call.""" 

115 from filelock import FileLock 

116 get_config = subprocess.run( 

117 ["Rscript", "-e", 

118 'library("irace"); ' 

119 f'load("{output_source}"); ' 

120 "last <- length(iraceResults$iterationElites); " 

121 "id <- iraceResults$iterationElites[last]; " 

122 "print(getConfigurationById(iraceResults, ids = id))"], 

123 capture_output=True) 

124 r_table = get_config.stdout.decode() 

125 if get_config.returncode != 0 or r_table.strip() == "": 

126 raise RuntimeError("Failed to get configuration from IRACE file " 

127 f"{output_source}:\n" 

128 f"{get_config.stdout.decode()}\n" 

129 f"{get_config.stderr.decode()}") 

130 

131 # Join the table header and content together 

132 header = "" 

133 content = "" 

134 for i, line in enumerate(r_table.splitlines()): 

135 if i & 1 == 0: # Even lines are headers 

136 header += line 

137 else: # Odd lines are parameter values 

138 # First element is the ID 

139 line = " ".join(line.split(" ")[1:]) 

140 content += line 

141 # First header item is the ID 

142 header = [x for x in header.split(" ") if x != ""][1:] 

143 content = [x for x in content.split(" ") if x != ""][1:] 

144 configuration = "" 

145 for parameter, value in zip(header, content): 

146 if not parameter == ".PARENT." and value != "NA" and value != "<NA>": 

147 configuration += f"--{parameter} {value} " 

148 configuration = Solver.config_str_to_dict(configuration) 

149 if output_target is None or not output_target.exists(): 

150 return configuration 

151 

152 time_stamp = scenario.scenario_file_path.stat().st_mtime 

153 configuration["configuration_id"] =\ 

154 f"{IRACE.__name__}_{time_stamp}_{run_id}" 

155 instance_names = scenario.instance_set.instance_names 

156 lock = FileLock(f"{output_target}.lock") 

157 with lock.acquire(timeout=60): 

158 performance_data = PerformanceDataFrame(output_target) 

159 # Resolve absolute path to Solver column 

160 solver = [s for s in performance_data.solvers 

161 if Path(s).name == scenario.solver.name][0] 

162 # For some reason the instance paths in the instance set are absolute 

163 instances = [instance for instance in performance_data.instances 

164 if Path(instance).name in instance_names] 

165 # We don't set the seed in the dataframe, as that should be part of the conf 

166 performance_data.set_value( 

167 value=[str(configuration)], 

168 solver=solver, 

169 instance=instances, 

170 objective=None, 

171 run=run_id, 

172 solver_fields=[PerformanceDataFrame.column_configuration] 

173 ) 

174 performance_data.save_csv() 

175 

176 def get_status_from_logs(self: Configurator) -> None: 

177 """Method to scan the log files of the configurator for warnings.""" 

178 raise NotImplementedError 

179 

180 

181class IRACEScenario(ConfigurationScenario): 

182 """Class for IRACE scenario.""" 

183 

184 def __init__(self: ConfigurationScenario, 

185 solver: Solver, 

186 instance_set: InstanceSet, 

187 sparkle_objectives: list[SparkleObjective], 

188 parent_directory: Path, 

189 number_of_runs: int = None, solver_calls: int = None, 

190 cutoff_time: int = None, 

191 max_time: int = None, 

192 budget_estimation: float = None, 

193 first_test: int = None, 

194 mu: int = None, 

195 max_iterations: int = None, 

196 feature_data: FeatureDataFrame = None, 

197 )\ 

198 -> None: 

199 """Initialize scenario paths and names. 

200 

201 Args: 

202 solver: Solver that should be configured. 

203 instance_set: Instances object for the scenario. 

204 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

205 Will be simplified to the first objective. 

206 parent_directory: Path where the scenario files will be placed. 

207 number_of_runs: The number of configurator runs to perform 

208 for configuring the solver. 

209 solver_calls: The number of times the solver is called for each 

210 configuration run. [MaxExperiments] 

211 cutoff_time: The maximum time allowed for each individual run during 

212 configuration. 

213 max_time: The time budget (CPU) allocated for the sum of solver calls 

214 done by the configurator in seconds. [MaxTime] 

215 budget_estimation: Fraction (smaller than 1) of the budget used to estimate 

216 the mean computation time of a configuration. Only used when maxTime > 0. 

217 Default: Computed as cutoff_time / max_time. [BudgetEstimation] 

218 first_test: Specifies how many instances are evaluated before the first 

219 elimination test. IRACE Default: 5. [firstTest] 

220 mu: Parameter used to define the number of configurations sampled and 

221 evaluated at each iteration. IRACE Default: 5. [mu] 

222 max_iterations: Maximum number of iterations to be executed. Each iteration 

223 involves the generation of new configurations and the use of racing to 

224 select the best configurations. By default (with 0), irace calculates a 

225 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where 

226 N^param is the number of non-fixed parameters to be tuned. 

227 Setting this parameter may make irace stop sooner than it should without 

228 using all the available budget. We recommend to use the default value. 

229 feature_data: FeatureDataFrame object with the feature data. 

230 Currently not supported by IRACE. 

231 """ 

232 """ 

233 Other possible arguments that are not added yet to Sparkle: 

234 --test-num-elites Number of elite configurations returned by irace that 

235 will be tested if test instances are provided. 

236 Default: 1. 

237 --test-iteration-elites Enable/disable testing the elite configurations 

238 found at each iteration. Default: 0. 

239 --test-type Statistical test used for elimination. The default 

240 value selects t-test if capping is enabled or F-test, 

241 otherwise. Valid values are: F-test (Friedman test), 

242 t-test (pairwise t-tests with no correction), 

243 t-test-bonferroni (t-test with Bonferroni's correction 

244 for multiple comparisons), t-test-holm (t-test with 

245 Holm's correction for multiple comparisons). 

246 --each-test Number of instances evaluated between elimination 

247 tests. Default: 1. 

248 --load-balancing Enable/disable load-balancing when executing 

249 experiments in parallel. Load-balancing makes better 

250 use of computing resources, but increases 

251 communication overhead. If this overhead is large, 

252 disabling load-balancing may be faster. Default: 1. 

253 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner 

254 in parallel (parameter parallel is the number of 

255 slaves). Default: 0. 

256 --batchmode Specify how irace waits for jobs to finish when 

257 targetRunner submits jobs to a batch cluster: sge, 

258 pbs, torque, slurm or htcondor. targetRunner must 

259 submit jobs to the cluster using, for example, qsub. 

260 Default: 0. 

261 --digits Maximum number of decimal places that are significant 

262 for numerical (real) parameters. Default: 4. 

263 --soft-restart Enable/disable the soft restart strategy that avoids 

264 premature convergence of the probabilistic model. 

265 Default: 1. 

266 --soft-restart-threshold Soft restart threshold value for numerical 

267 parameters. If NA, NULL or "", it is computed as 

268 10^-digits. 

269 -e,--elitist Enable/disable elitist irace. Default: 1. 

270 --elitist-new-instances Number of instances added to the execution list 

271 before previous instances in elitist irace. Default: 

272 1. 

273 --elitist-limit In elitist irace, maximum number per race of 

274 elimination tests that do not eliminate a 

275 configuration. Use 0 for no limit. Default: 2. 

276 --capping Enable the use of adaptive capping, a technique 

277 designed for minimizing the computation time of 

278 configurations. This is only available when elitist is 

279 active. Default: 0. 

280 --capping-type Measure used to obtain the execution bound from the 

281 performance of the elite configurations: median, mean, 

282 worst, best. Default: median. 

283 --bound-type Method to calculate the mean performance of elite 

284 configurations: candidate or instance. Default: 

285 candidate. 

286 --bound-max Maximum execution bound for targetRunner. It must be 

287 specified when capping is enabled. Default: 0. 

288 --bound-digits Precision used for calculating the execution time. It 

289 must be specified when capping is enabled. Default: 0. 

290 --bound-par Penalization constant for timed out executions 

291 (executions that reach boundMax execution time). 

292 Default: 1. 

293 --bound-as-timeout Replace the configuration cost of bounded executions 

294 with boundMax. Default: 1. 

295 --postselection Percentage of the configuration budget used to perform 

296 a postselection race of the best configurations of 

297 each iteration after the execution of irace. Default: 

298 0. 

299 --iterations Maximum number of iterations. Default: 0. 

300 --experiments-per-iteration Number of runs of the target algorithm per 

301 iteration. Default: 0. 

302 --min-survival Minimum number of configurations needed to continue 

303 the execution of each race (iteration). Default: 0. 

304 --num-configurations Number of configurations to be sampled and evaluated 

305 at each iteration. Default: 0. 

306 --confidence Confidence level for the elimination test. Default: 

307 0.95.""" 

308 super().__init__(solver, instance_set, sparkle_objectives, parent_directory) 

309 self.solver = solver 

310 self.instance_set = instance_set 

311 if sparkle_objectives is not None: 

312 self.sparkle_objective = sparkle_objectives[0] 

313 else: 

314 self.sparkle_objective = None 

315 

316 if feature_data is not None: 

317 print("WARNING: Instance features currently not supported by IRACE.") 

318 

319 self.number_of_runs = number_of_runs 

320 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None 

321 self.max_time = max_time if max_time and max_time > 0 else None 

322 self.cutoff_time = cutoff_time 

323 self.budget_estimation = budget_estimation 

324 self.first_test = first_test 

325 self.mu = mu 

326 self.max_iterations = max_iterations 

327 

328 # Pathing 

329 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt" 

330 self.tmp = self.directory / "tmp" 

331 self.validation = self.directory / "validation" 

332 self.results_directory = self.directory / "results" 

333 

334 def create_scenario(self: IRACEScenario) -> None: 

335 """Create scenario with solver and instances in the parent directory. 

336 

337 This prepares all the necessary subdirectories related to configuration. 

338 Removes any existing directory if it overlaps with the scenario name. 

339 

340 Args: 

341 parent_directory: Directory in which the scenario should be created. 

342 """ 

343 # Set up directories 

344 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory 

345 self.directory.mkdir(exist_ok=True, parents=True) 

346 self.tmp.mkdir(exist_ok=True) 

347 self.validation.mkdir(exist_ok=True) 

348 self.results_directory.mkdir(exist_ok=True) 

349 

350 with self.instance_file_path.open("w+") as file: 

351 for instance_path in self.instance_set._instance_paths: 

352 file.write(f"{instance_path.name}\n") 

353 self.create_scenario_file() 

354 

355 def create_scenario_file(self: ConfigurationScenario) -> Path: 

356 """Create a file from the IRACE scenario. 

357 

358 Returns: 

359 Path to the created file. 

360 """ 

361 from sparkle.tools.parameters import PCSConvention 

362 solver_path = self.solver.directory.absolute() 

363 pcs_path = self.solver.get_pcs_file(port_type=PCSConvention.IRACE).absolute() 

364 with self.scenario_file_path.open("w") as file: 

365 file.write( 

366 f'execDir = "{self.directory.absolute()}"\n' 

367 'targetRunnerLauncher = "python3"\n' 

368 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n' 

369 'targetCmdline = "{targetRunner} ' 

370 f"{solver_path} {self.sparkle_objective} {self.cutoff_time} " 

371 '{configurationID} {instanceID} {seed} {instance} {targetRunnerArgs}"\n' 

372 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

373 f'parameterFile = "{pcs_path.absolute()}"\n' 

374 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n' 

375 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n' 

376 "debugLevel = 1\n" # The verbosity level of IRACE 

377 ) 

378 if self.solver_calls is not None: 

379 file.write(f"maxExperiments = {self.solver_calls}\n") 

380 elif self.max_time is not None: 

381 file.write(f"maxTime = {self.max_time}\n") 

382 if self.solver_calls is not None and self.max_time is not None: 

383 print("WARNING: Both solver calls and max time specified for scenario. " 

384 "This is not supported by IRACE, defaulting to solver calls.") 

385 elif self.solver_calls is None and self.max_time is None: 

386 print("WARNING: Neither solver calls nor max time specified. " 

387 "Either budget is required for the IRACE scenario.") 

388 if self.max_time is not None and self.budget_estimation is None: 

389 # Auto Estimate 

390 if self.cutoff_time < self.max_time: 

391 self.budget_estimation = self.cutoff_time / self.max_time 

392 file.write(f"budgetEstimation = {self.budget_estimation}\n") 

393 if self.first_test is not None: 

394 file.write(f"firstTest = {self.first_test}\n") 

395 if self.mu is not None: 

396 file.write(f"mu = {self.mu}\n") 

397 if self.max_iterations is not None: 

398 file.write(f"nbIterations = {self.max_iterations}\n") 

399 print("Verifying contents of IRACE scenario file and testing solver call...") 

400 check_file = subprocess.run( 

401 [f"{IRACE.configurator_executable.absolute()}", 

402 "-s", f"{self.scenario_file_path.absolute()}", "--check"], 

403 capture_output=True) 

404 if check_file.returncode != 0: 

405 stdout_msg = "\n".join([ 

406 line for line in check_file.stdout.decode().splitlines() 

407 if not line.startswith("#")]) 

408 print("An error occured in the IRACE scenario file:\n", 

409 self.scenario_file_path.open("r").read(), 

410 stdout_msg, "\n", 

411 check_file.stderr.decode()) 

412 return None 

413 print("IRACE scenario file is valid.") 

414 return self.scenario_file_path 

415 

416 def serialize(self: IRACEScenario) -> dict: 

417 """Serialize the IRACE scenario.""" 

418 return { 

419 "number_of_runs": self.number_of_runs, 

420 "solver_calls": self.solver_calls, 

421 "max_time": self.max_time, 

422 "cutoff_time": self.cutoff_time, 

423 "budget_estimation": self.budget_estimation, 

424 "first_test": self.first_test, 

425 "mu": self.mu, 

426 "max_iterations": self.max_iterations, 

427 } 

428 

429 @staticmethod 

430 def from_file(scenario_file: Path) -> IRACEScenario: 

431 """Reads scenario file and initalises IRACEScenario.""" 

432 scenario_dict = {keyvalue[0]: keyvalue[1] 

433 for keyvalue in (line.split(" = ", maxsplit=1) 

434 for line in scenario_file.open().readlines() 

435 if line.strip() != "")} 

436 _, solver_path, objective, cutoff, _, _, _, _, _ =\ 

437 scenario_dict.pop("targetCmdline").split(" ") 

438 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)] 

439 scenario_dict["cutoff_time"] = int(cutoff) 

440 scenario_dict["parent_directory"] = scenario_file.parent.parent 

441 scenario_dict["number_of_runs"] =\ 

442 len([p for p in (scenario_file.parent / "results").iterdir()]) 

443 scenario_dict.pop("targetRunner") 

444 scenario_dict.pop("execDir") 

445 scenario_dict.pop("targetRunnerLauncher") 

446 scenario_dict.pop("deterministic") 

447 scenario_dict.pop("parameterFile") 

448 scenario_dict.pop("debugLevel") 

449 instance_set_path =\ 

450 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"')) 

451 instance_set = Instance_Set(instance_set_path) 

452 solver = Solver(Path(solver_path.strip())) 

453 scenario_dict.pop("trainInstancesFile") 

454 # Replace keys with scenario variable names 

455 if "budgetEstimation" in scenario_dict: 

456 scenario_dict["budget_estimation"] =\ 

457 float(scenario_dict.pop(("budgetEstimation"))) 

458 if "firstTest" in scenario_dict: 

459 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest")) 

460 if "mu" in scenario_dict: 

461 scenario_dict["mu"] = int(scenario_dict.pop("mu")) 

462 if "nbIterations" in scenario_dict: 

463 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations")) 

464 if "maxExperiments" in scenario_dict: 

465 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments")) 

466 if "maxTime" in scenario_dict: 

467 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime")) 

468 

469 return IRACEScenario(solver, instance_set, **scenario_dict)