Coverage for sparkle/configurator/implementations/irace.py: 72%

199 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 13:21 +0000

1"""Configurator classes to implement IRACE in Sparkle.""" 

2from __future__ import annotations 

3import shutil 

4import subprocess 

5from pathlib import Path 

6 

7from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

8from sparkle.solver import Solver 

9from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

10from sparkle.instance import InstanceSet, Instance_Set 

11from sparkle.types import SparkleObjective, resolve_objective 

12 

13from runrunner import Runner, Run 

14 

15 

16class IRACE(Configurator): 

17 """Class for IRACE configurator.""" 

18 configurator_path = Path(__file__).parent.resolve() / "IRACE" 

19 configurator_target = configurator_path / "irace_target_algorithm.py" 

20 

21 full_name = "Iterated Racing for Automatic Algorithm Configuration" 

22 

23 r_regex = r'\[\d+\]\s*["‘](?P<data>[^"`]+)["’]' 

24 

25 def __init__(self: IRACE) -> None: 

26 """Initialize IRACE configurator.""" 

27 self._version: str = None 

28 super().__init__(multi_objective_support=False) 

29 

30 @property 

31 def name(self: IRACE) -> str: 

32 """Returns the name of the configurator.""" 

33 return IRACE.__name__ 

34 

35 @property 

36 def version(self: IRACE) -> str: 

37 """Returns the version of the configurator.""" 

38 if self._version is None: 

39 import re 

40 version_call = subprocess.run(["Rscript", "-e", "packageVersion('irace')"], 

41 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 

42 if version_call.returncode == 0: 

43 r_data = re.search(IRACE.r_regex, 

44 version_call.stdout.decode().strip()) 

45 if r_data is not None and r_data.group("data") is not None: 

46 self._version = r_data.group("data") 

47 return self._version 

48 

49 @staticmethod 

50 def configurator_executable() -> Path: 

51 """Returns the path to the IRACE executable. 

52 

53 # NOTE: For the base class this is a class property. 

54 However as it must be calculated in this case, it is a class method as calculated 

55 class properties do not exist in Python. 

56 

57 Returns: 

58 Path to the executable if it can be found, else None. 

59 """ 

60 if shutil.which("R") is None: 

61 return None # Not installed 

62 r_call = subprocess.run( 

63 ["Rscript", "-e", "find.package('irace')"], 

64 stdout=subprocess.PIPE, 

65 stderr=subprocess.PIPE) 

66 if r_call.returncode != 0: 

67 return None # Not installed 

68 import re 

69 r_path = re.search(IRACE.r_regex, 

70 r_call.stdout.decode().strip()) 

71 if r_path is None or r_path.group("data") is None: 

72 return # Could not find IRACE? 

73 path = Path(r_path.group("data")) 

74 return path / "bin" / "irace" 

75 

76 @staticmethod 

77 def scenario_class() -> ConfigurationScenario: 

78 """Returns the IRACE scenario class.""" 

79 return IRACEScenario 

80 

81 @staticmethod 

82 def check_requirements(verbose: bool = False) -> bool: 

83 """Check that IRACE is installed.""" 

84 import warnings 

85 if shutil.which("R") is None: 

86 if verbose: 

87 warnings.warn( 

88 "IRACE requires R, but R is not installed. " 

89 "Please ensure R is installed.") 

90 return False 

91 if not IRACE.configurator_executable(): 

92 if verbose: 

93 warnings.warn( 

94 "IRACE executable not found. Please ensure IRACE is installed " 

95 f"in the expected Path ({IRACE.configurator_path}).") 

96 return False 

97 return True 

98 

99 @staticmethod 

100 def download_requirements() -> None: 

101 """Download IRACE.""" 

102 if shutil.which("R") is None: 

103 raise RuntimeError("IRACE requires R, but R is not installed.") 

104 # Ensure personal library exists, do not raise warnings 

105 subprocess.run([ 

106 "Rscript", "-e", 

107 "dir.create(path = Sys.getenv('R_LIBS_USER'), " 

108 "showWarnings = FALSE, recursive = TRUE)"], 

109 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 

110 install_irace = subprocess.run( 

111 ["Rscript", "-e", 

112 # Install R 

113 "install.packages('irace', " 

114 "lib=Sys.getenv('R_LIBS_USER'), " # Install in user library 

115 "dependencies = TRUE, " # Ensure dependencies are installed 

116 "repos='https://cloud.r-project.org')"], # Set source 

117 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 

118 print(f"{install_irace.stdout.decode()}\n\n" 

119 f"{install_irace.stderr.decode()}") 

120 if install_irace.returncode != 0: 

121 import warnings 

122 warnings.warn("IRACE had a non-zero return code during installation!\n\n" 

123 f"{install_irace.stdout.decode()}\n\n" 

124 f"{install_irace.stderr.decode()}") 

125 

126 def configure(self: IRACE, 

127 scenario: ConfigurationScenario, 

128 data_target: PerformanceDataFrame, 

129 validate_after: bool = True, 

130 sbatch_options: list[str] = [], 

131 slurm_prepend: str | list[str] | Path = None, 

132 num_parallel_jobs: int = None, 

133 base_dir: Path = None, 

134 run_on: Runner = Runner.SLURM) -> Run: 

135 """Start configuration job. 

136 

137 Args: 

138 scenario: ConfigurationScenario to execute. 

139 data_target: PerformanceDataFrame where to store the found configurations 

140 validate_after: Whether to validate the configuration on the training set 

141 afterwards or not. 

142 sbatch_options: List of slurm batch options to use 

143 slurm_prepend: Slurm script to prepend to the sbatch 

144 num_parallel_jobs: The maximum number of jobs to run in parallel 

145 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed 

146 run_on: On which platform to run the jobs. Default: Slurm. 

147 

148 Returns: 

149 A RunRunner Run object. 

150 """ 

151 scenario.create_scenario() 

152 configuration_ids = scenario.configuration_ids 

153 # Create command to call IRACE. Create plural based on number of runs 

154 # TODO: Setting seeds like this is weird and should be inspected. 

155 seeds = [i for i in range(scenario.number_of_runs)] 

156 output_files = [ 

157 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata" 

158 for job_idx in configuration_ids] 

159 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} " 

160 f"{IRACE.__name__} {output_path} {data_target.csv_filepath} " 

161 f"{scenario.scenario_file_path} {configuration_id} " 

162 f"{IRACE.configurator_executable().absolute()} " 

163 f"--scenario {scenario.scenario_file_path} " 

164 f"--log-file {output_path} " 

165 f"--seed {seed}" for seed, configuration_id, output_path 

166 in zip(seeds, configuration_ids, output_files)] 

167 return super().configure( 

168 configuration_commands=cmds, 

169 data_target=data_target, 

170 output=output_files, 

171 scenario=scenario, 

172 configuration_ids=configuration_ids, 

173 sbatch_options=sbatch_options, 

174 slurm_prepend=slurm_prepend, 

175 validate_after=validate_after, 

176 num_parallel_jobs=num_parallel_jobs, 

177 base_dir=base_dir, 

178 run_on=run_on 

179 ) 

180 

181 @staticmethod 

182 def organise_output(output_source: Path, 

183 output_target: Path, 

184 scenario: IRACEScenario, 

185 configuration_id: str) -> None | dict: 

186 """Method to restructure and clean up after a single configurator call.""" 

187 get_config = subprocess.run( 

188 ["Rscript", "-e", 

189 'library("irace"); ' 

190 f'load("{output_source}"); ' 

191 "last <- length(iraceResults$iterationElites); " 

192 "id <- iraceResults$iterationElites[last]; " 

193 "print(getConfigurationById(iraceResults, ids = id))"], 

194 capture_output=True) 

195 r_table = get_config.stdout.decode() 

196 if get_config.returncode != 0 or r_table.strip() == "": 

197 raise RuntimeError("Failed to get configuration from IRACE file " 

198 f"{output_source}:\n" 

199 f"{get_config.stdout.decode()}\n" 

200 f"{get_config.stderr.decode()}") 

201 

202 # Join the table header and content together 

203 header = "" 

204 content = "" 

205 for i, line in enumerate(r_table.splitlines()): 

206 if i & 1 == 0: # Even lines are headers 

207 header += line 

208 else: # Odd lines are parameter values 

209 # First element is the ID 

210 line = " ".join(line.split(" ")[1:]) 

211 content += line 

212 # First header item is the ID 

213 header = [x for x in header.split(" ") if x != ""][1:] 

214 content = [x for x in content.split(" ") if x != ""][1:] 

215 configuration = "" 

216 for parameter, value in zip(header, content): 

217 if not parameter == ".PARENT." and value != "NA" and value != "<NA>": 

218 configuration += f"--{parameter} {value} " 

219 configuration = Solver.config_str_to_dict(configuration) 

220 return Configurator.save_configuration(scenario, configuration_id, 

221 configuration, output_target) 

222 

223 def get_status_from_logs(self: Configurator) -> None: 

224 """Method to scan the log files of the configurator for warnings.""" 

225 raise NotImplementedError 

226 

227 

228class IRACEScenario(ConfigurationScenario): 

229 """Class for IRACE scenario.""" 

230 

231 def __init__(self: IRACEScenario, 

232 solver: Solver, 

233 instance_set: InstanceSet, 

234 sparkle_objectives: list[SparkleObjective], 

235 number_of_runs: int, 

236 parent_directory: Path, 

237 solver_calls: int = None, 

238 solver_cutoff_time: int = None, 

239 max_time: int = None, 

240 budget_estimation: float = None, 

241 first_test: int = None, 

242 mu: int = None, 

243 max_iterations: int = None, 

244 feature_data: FeatureDataFrame = None, 

245 )\ 

246 -> None: 

247 """Initialize scenario paths and names. 

248 

249 Args: 

250 solver: Solver that should be configured. 

251 instance_set: Instances object for the scenario. 

252 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

253 Will be simplified to the first objective. 

254 number_of_runs: The number of configurator runs to perform 

255 for configuring the solver. 

256 parent_directory: Path where the scenario files will be placed. 

257 solver_calls: The number of times the solver is called for each 

258 configuration run. [MaxExperiments] 

259 solver_cutoff_time: The maximum time allowed for each individual run during 

260 configuration. 

261 max_time: The time budget (CPU) allocated for the sum of solver calls 

262 done by the configurator in seconds. [MaxTime] 

263 budget_estimation: Fraction (smaller than 1) of the budget used to estimate 

264 the mean computation time of a configuration. Only used when maxTime > 0. 

265 Default: Computed as cutoff_time / max_time. [BudgetEstimation] 

266 first_test: Specifies how many instances are evaluated before the first 

267 elimination test. IRACE Default: 5. [firstTest] 

268 mu: Parameter used to define the number of configurations sampled and 

269 evaluated at each iteration. IRACE Default: 5. [mu] 

270 max_iterations: Maximum number of iterations to be executed. Each iteration 

271 involves the generation of new configurations and the use of racing to 

272 select the best configurations. By default (with 0), irace calculates a 

273 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where 

274 N^param is the number of non-fixed parameters to be tuned. 

275 Setting this parameter may make irace stop sooner than it should without 

276 using all the available budget. We recommend to use the default value. 

277 feature_data: FeatureDataFrame object with the feature data. 

278 Currently not supported by IRACE. 

279 """ 

280 """ 

281 Other possible arguments that are not added yet to Sparkle: 

282 --test-num-elites Number of elite configurations returned by irace that 

283 will be tested if test instances are provided. 

284 Default: 1. 

285 --test-iteration-elites Enable/disable testing the elite configurations 

286 found at each iteration. Default: 0. 

287 --test-type Statistical test used for elimination. The default 

288 value selects t-test if capping is enabled or F-test, 

289 otherwise. Valid values are: F-test (Friedman test), 

290 t-test (pairwise t-tests with no correction), 

291 t-test-bonferroni (t-test with Bonferroni's correction 

292 for multiple comparisons), t-test-holm (t-test with 

293 Holm's correction for multiple comparisons). 

294 --each-test Number of instances evaluated between elimination 

295 tests. Default: 1. 

296 --load-balancing Enable/disable load-balancing when executing 

297 experiments in parallel. Load-balancing makes better 

298 use of computing resources, but increases 

299 communication overhead. If this overhead is large, 

300 disabling load-balancing may be faster. Default: 1. 

301 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner 

302 in parallel (parameter parallel is the number of 

303 slaves). Default: 0. 

304 --batchmode Specify how irace waits for jobs to finish when 

305 targetRunner submits jobs to a batch cluster: sge, 

306 pbs, torque, slurm or htcondor. targetRunner must 

307 submit jobs to the cluster using, for example, qsub. 

308 Default: 0. 

309 --digits Maximum number of decimal places that are significant 

310 for numerical (real) parameters. Default: 4. 

311 --soft-restart Enable/disable the soft restart strategy that avoids 

312 premature convergence of the probabilistic model. 

313 Default: 1. 

314 --soft-restart-threshold Soft restart threshold value for numerical 

315 parameters. If NA, NULL or "", it is computed as 

316 10^-digits. 

317 -e,--elitist Enable/disable elitist irace. Default: 1. 

318 --elitist-new-instances Number of instances added to the execution list 

319 before previous instances in elitist irace. Default: 

320 1. 

321 --elitist-limit In elitist irace, maximum number per race of 

322 elimination tests that do not eliminate a 

323 configuration. Use 0 for no limit. Default: 2. 

324 --capping Enable the use of adaptive capping, a technique 

325 designed for minimizing the computation time of 

326 configurations. This is only available when elitist is 

327 active. Default: 0. 

328 --capping-type Measure used to obtain the execution bound from the 

329 performance of the elite configurations: median, mean, 

330 worst, best. Default: median. 

331 --bound-type Method to calculate the mean performance of elite 

332 configurations: candidate or instance. Default: 

333 candidate. 

334 --bound-max Maximum execution bound for targetRunner. It must be 

335 specified when capping is enabled. Default: 0. 

336 --bound-digits Precision used for calculating the execution time. It 

337 must be specified when capping is enabled. Default: 0. 

338 --bound-par Penalization constant for timed out executions 

339 (executions that reach boundMax execution time). 

340 Default: 1. 

341 --bound-as-timeout Replace the configuration cost of bounded executions 

342 with boundMax. Default: 1. 

343 --postselection Percentage of the configuration budget used to perform 

344 a postselection race of the best configurations of 

345 each iteration after the execution of irace. Default: 

346 0. 

347 --iterations Maximum number of iterations. Default: 0. 

348 --experiments-per-iteration Number of runs of the target algorithm per 

349 iteration. Default: 0. 

350 --min-survival Minimum number of configurations needed to continue 

351 the execution of each race (iteration). Default: 0. 

352 --num-configurations Number of configurations to be sampled and evaluated 

353 at each iteration. Default: 0. 

354 --confidence Confidence level for the elimination test. Default: 

355 0.95.""" 

356 super().__init__(solver, instance_set, sparkle_objectives, 

357 number_of_runs, parent_directory) 

358 self.solver = solver 

359 self.instance_set = instance_set 

360 if sparkle_objectives is not None: 

361 self.sparkle_objective = sparkle_objectives[0] 

362 else: 

363 self.sparkle_objective = None 

364 

365 if feature_data is not None: 

366 print("WARNING: Instance features currently not supported by IRACE.") 

367 

368 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None 

369 self.max_time = max_time if max_time and max_time > 0 else None 

370 self.solver_cutoff_time = solver_cutoff_time 

371 self.budget_estimation = budget_estimation 

372 self.first_test = first_test 

373 self.mu = mu 

374 self.max_iterations = max_iterations 

375 

376 # Pathing 

377 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt" 

378 self.validation = self.directory / "validation" 

379 self.results_directory = self.directory / "results" 

380 

381 @property 

382 def configurator(self: IRACEScenario) -> IRACE: 

383 """Return the type of configurator the scenario belongs to.""" 

384 return IRACE 

385 

386 def create_scenario(self: IRACEScenario) -> None: 

387 """Create scenario with solver and instances in the parent directory. 

388 

389 This prepares all the necessary subdirectories related to configuration. 

390 Removes any existing directory if it overlaps with the scenario name. 

391 

392 Args: 

393 parent_directory: Directory in which the scenario should be created. 

394 """ 

395 # Set up directories 

396 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory 

397 self.directory.mkdir(exist_ok=True, parents=True) 

398 self.tmp.mkdir(exist_ok=True) 

399 self.validation.mkdir(exist_ok=True) 

400 self.results_directory.mkdir(exist_ok=True) 

401 

402 with self.instance_file_path.open("w+") as file: 

403 for instance_path in self.instance_set._instance_paths: 

404 file.write(f"{instance_path.name}\n") 

405 self.create_scenario_file() 

406 

407 def create_scenario_file(self: IRACEScenario) -> Path: 

408 """Create a file from the IRACE scenario. 

409 

410 Returns: 

411 Path to the created file. 

412 """ 

413 super().create_scenario_file() 

414 from sparkle.tools.parameters import PCSConvention 

415 solver_path = self.solver.directory.absolute() 

416 pcs_path = self.solver.get_pcs_file(port_type=PCSConvention.IRACE).absolute() 

417 with self.scenario_file_path.open("w") as file: 

418 file.write( 

419 f'execDir = "{self.directory.absolute()}"\n' 

420 'targetRunnerLauncher = "python3"\n' 

421 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n' 

422 'targetCmdline = "{targetRunner} ' 

423 f"{solver_path} {self.sparkle_objective} {self.solver_cutoff_time} " 

424 '{configurationID} {instanceID} {seed} {instance} {targetRunnerArgs}"\n' 

425 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

426 f'parameterFile = "{pcs_path.absolute()}"\n' 

427 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n' 

428 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n' 

429 "debugLevel = 1\n" # The verbosity level of IRACE 

430 ) 

431 if self.solver_calls is not None: 

432 file.write(f"maxExperiments = {self.solver_calls}\n") 

433 elif self.max_time is not None: 

434 file.write(f"maxTime = {self.max_time}\n") 

435 if self.solver_calls is not None and self.max_time is not None: 

436 print("WARNING: Both solver calls and max time specified for scenario. " 

437 "This is not supported by IRACE, defaulting to solver calls.") 

438 elif self.solver_calls is None and self.max_time is None: 

439 print("WARNING: Neither solver calls nor max time specified. " 

440 "Either budget is required for the IRACE scenario.") 

441 if self.max_time is not None and self.budget_estimation is None: 

442 # Auto Estimate 

443 if self.solver_cutoff_time < self.max_time: 

444 self.budget_estimation = self.solver_cutoff_time / self.max_time 

445 file.write(f"budgetEstimation = {self.budget_estimation}\n") 

446 if self.first_test is not None: 

447 file.write(f"firstTest = {self.first_test}\n") 

448 if self.mu is not None: 

449 file.write(f"mu = {self.mu}\n") 

450 if self.max_iterations is not None: 

451 file.write(f"nbIterations = {self.max_iterations}\n") 

452 print("Verifying contents of IRACE scenario file and testing solver call...") 

453 check_file = subprocess.run( 

454 [f"{IRACE.configurator_executable().absolute()}", 

455 "-s", f"{self.scenario_file_path.absolute()}", "--check"], 

456 capture_output=True) 

457 if check_file.returncode != 0: 

458 stdout_msg = "\n".join([ 

459 line for line in check_file.stdout.decode().splitlines() 

460 if not line.startswith("#")]) 

461 print("An error occured in the IRACE scenario file:\n", 

462 self.scenario_file_path.open("r").read(), 

463 stdout_msg, "\n", 

464 check_file.stderr.decode()) 

465 return None 

466 print("IRACE scenario file is valid.") 

467 return self.scenario_file_path 

468 

469 def serialise(self: IRACEScenario) -> dict: 

470 """Serialize the IRACE scenario.""" 

471 return { 

472 "number_of_runs": self.number_of_runs, 

473 "solver_calls": self.solver_calls, 

474 "max_time": self.max_time, 

475 "solver_cutoff_time": self.solver_cutoff_time, 

476 "budget_estimation": self.budget_estimation, 

477 "first_test": self.first_test, 

478 "mu": self.mu, 

479 "max_iterations": self.max_iterations, 

480 } 

481 

482 @staticmethod 

483 def from_file(scenario_file: Path) -> IRACEScenario: 

484 """Reads scenario file and initalises IRACEScenario.""" 

485 scenario_dict = {keyvalue[0]: keyvalue[1] 

486 for keyvalue in (line.split(" = ", maxsplit=1) 

487 for line in scenario_file.open().readlines() 

488 if line.strip() != "")} 

489 _, solver_path, objective, cutoff, _, _, _, _, _ =\ 

490 scenario_dict.pop("targetCmdline").split(" ") 

491 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)] 

492 scenario_dict["solver_cutoff_time"] = int(cutoff) 

493 scenario_dict["parent_directory"] = scenario_file.parent.parent 

494 scenario_dict["number_of_runs"] =\ 

495 len([p for p in (scenario_file.parent / "results").iterdir()]) 

496 scenario_dict.pop("targetRunner") 

497 scenario_dict.pop("execDir") 

498 scenario_dict.pop("targetRunnerLauncher") 

499 scenario_dict.pop("deterministic") 

500 scenario_dict.pop("parameterFile") 

501 scenario_dict.pop("debugLevel") 

502 instance_set_path =\ 

503 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"')) 

504 instance_set = Instance_Set(instance_set_path) 

505 solver = Solver(Path(solver_path.strip())) 

506 scenario_dict.pop("trainInstancesFile") 

507 # Replace keys with scenario variable names 

508 if "budgetEstimation" in scenario_dict: 

509 scenario_dict["budget_estimation"] =\ 

510 float(scenario_dict.pop(("budgetEstimation"))) 

511 if "firstTest" in scenario_dict: 

512 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest")) 

513 if "mu" in scenario_dict: 

514 scenario_dict["mu"] = int(scenario_dict.pop("mu")) 

515 if "nbIterations" in scenario_dict: 

516 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations")) 

517 if "maxExperiments" in scenario_dict: 

518 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments")) 

519 if "maxTime" in scenario_dict: 

520 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime")) 

521 

522 return IRACEScenario(solver, instance_set, **scenario_dict)