Coverage for sparkle/configurator/implementations/irace.py: 72%

201 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 10:17 +0000

1"""Configurator classes to implement IRACE in Sparkle.""" 

2 

3from __future__ import annotations 

4import shutil 

5import subprocess 

6import random 

7from pathlib import Path 

8 

9from sparkle.configurator.configurator import Configurator, ConfigurationScenario 

10from sparkle.solver import Solver 

11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

12from sparkle.instance import InstanceSet, Instance_Set 

13from sparkle.types import SparkleObjective, resolve_objective 

14 

15from runrunner import Runner, Run 

16 

17 

18class IRACE(Configurator): 

19 """Class for IRACE configurator.""" 

20 

21 configurator_path = Path(__file__).parent.resolve() / "IRACE" 

22 configurator_target = configurator_path / "irace_target_algorithm.py" 

23 

24 full_name = "Iterated Racing for Automatic Algorithm Configuration" 

25 

26 r_regex = r'\[\d+\]\s*["‘](?P<data>[^"`]+)["’]' 

27 

28 def __init__(self: IRACE) -> None: 

29 """Initialize IRACE configurator.""" 

30 self._version: str = None 

31 super().__init__(multi_objective_support=False) 

32 

33 @property 

34 def name(self: IRACE) -> str: 

35 """Returns the name of the configurator.""" 

36 return IRACE.__name__ 

37 

38 @property 

39 def version(self: IRACE) -> str: 

40 """Returns the version of the configurator.""" 

41 if self._version is None: 

42 import re 

43 

44 version_call = subprocess.run( 

45 ["Rscript", "-e", "packageVersion('irace')"], 

46 stdout=subprocess.PIPE, 

47 stderr=subprocess.PIPE, 

48 ) 

49 if version_call.returncode == 0: 

50 r_data = re.search(IRACE.r_regex, version_call.stdout.decode().strip()) 

51 if r_data is not None and r_data.group("data") is not None: 

52 self._version = r_data.group("data") 

53 return self._version 

54 

55 @staticmethod 

56 def configurator_executable() -> Path: 

57 """Returns the path to the IRACE executable. 

58 

59 # NOTE: For the base class this is a class property. 

60 However as it must be calculated in this case, it is a class method as calculated 

61 class properties do not exist in Python. 

62 

63 Returns: 

64 Path to the executable if it can be found, else None. 

65 """ 

66 if shutil.which("R") is None: 

67 return None # Not installed 

68 r_call = subprocess.run( 

69 ["Rscript", "-e", "find.package('irace')"], 

70 stdout=subprocess.PIPE, 

71 stderr=subprocess.PIPE, 

72 ) 

73 if r_call.returncode != 0: 

74 return None # Not installed 

75 import re 

76 

77 r_path = re.search(IRACE.r_regex, r_call.stdout.decode().strip()) 

78 if r_path is None or r_path.group("data") is None: 

79 return # Could not find IRACE? 

80 path = Path(r_path.group("data")) 

81 return path / "bin" / "irace" 

82 

83 @staticmethod 

84 def scenario_class() -> ConfigurationScenario: 

85 """Returns the IRACE scenario class.""" 

86 return IRACEScenario 

87 

88 @staticmethod 

89 def check_requirements(verbose: bool = False) -> bool: 

90 """Check that IRACE is installed.""" 

91 import warnings 

92 

93 if shutil.which("R") is None: 

94 if verbose: 

95 warnings.warn( 

96 "IRACE requires R, but R is not installed. " 

97 "Please ensure R is installed." 

98 ) 

99 return False 

100 if not IRACE.configurator_executable(): 

101 if verbose: 

102 warnings.warn( 

103 "IRACE executable not found. Please ensure IRACE is installed " 

104 f"in the expected Path ({IRACE.configurator_path})." 

105 ) 

106 return False 

107 return True 

108 

109 @staticmethod 

110 def download_requirements() -> None: 

111 """Download IRACE.""" 

112 if shutil.which("R") is None: 

113 raise RuntimeError("IRACE requires R, but R is not installed.") 

114 # Ensure personal library exists, do not raise warnings 

115 subprocess.run( 

116 [ 

117 "Rscript", 

118 "-e", 

119 "dir.create(path = Sys.getenv('R_LIBS_USER'), " 

120 "showWarnings = FALSE, recursive = TRUE)", 

121 ], 

122 stdout=subprocess.PIPE, 

123 stderr=subprocess.PIPE, 

124 ) 

125 install_irace = subprocess.run( 

126 [ 

127 "Rscript", 

128 "-e", 

129 # Install R 

130 "install.packages('irace', " 

131 "lib=Sys.getenv('R_LIBS_USER'), " # Install in user library 

132 "dependencies = TRUE, " # Ensure dependencies are installed 

133 "repos='https://cloud.r-project.org')", 

134 ], # Set source 

135 stdout=subprocess.PIPE, 

136 stderr=subprocess.PIPE, 

137 ) 

138 print(f"{install_irace.stdout.decode()}\n\n{install_irace.stderr.decode()}") 

139 if install_irace.returncode != 0: 

140 import warnings 

141 

142 warnings.warn( 

143 "IRACE had a non-zero return code during installation!\n\n" 

144 f"{install_irace.stdout.decode()}\n\n" 

145 f"{install_irace.stderr.decode()}" 

146 ) 

147 

148 def configure( 

149 self: IRACE, 

150 scenario: ConfigurationScenario, 

151 data_target: PerformanceDataFrame, 

152 validate_after: bool = True, 

153 sbatch_options: list[str] = [], 

154 slurm_prepend: str | list[str] | Path = None, 

155 num_parallel_jobs: int = None, 

156 base_dir: Path = None, 

157 run_on: Runner = Runner.SLURM, 

158 ) -> Run: 

159 """Start configuration job. 

160 

161 Args: 

162 scenario: ConfigurationScenario to execute. 

163 data_target: PerformanceDataFrame where to store the found configurations 

164 validate_after: Whether to validate the configuration on the training set 

165 afterwards or not. 

166 sbatch_options: List of slurm batch options to use 

167 slurm_prepend: Slurm script to prepend to the sbatch 

168 num_parallel_jobs: The maximum number of jobs to run in parallel 

169 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed 

170 run_on: On which platform to run the jobs. Default: Slurm. 

171 

172 Returns: 

173 A RunRunner Run object. 

174 """ 

175 scenario.create_scenario() 

176 configuration_ids = scenario.configuration_ids 

177 # Create command to call IRACE. Create plural based on number of runs 

178 seeds = [random.randint(0, 2**32 - 1) for _ in range(scenario.number_of_runs)] 

179 output_files = [ 

180 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata" 

181 for job_idx in configuration_ids 

182 ] 

183 cmds = [ 

184 f"python3 {Configurator.configurator_cli_path.absolute()} " 

185 f"{IRACE.__name__} {output_path} {data_target.csv_filepath} " 

186 f"{scenario.scenario_file_path} {configuration_id} " 

187 f"{IRACE.configurator_executable().absolute()} " 

188 f"--scenario {scenario.scenario_file_path} " 

189 f"--log-file {output_path} " 

190 f"--seed {seed}" 

191 for seed, configuration_id, output_path in zip( 

192 seeds, configuration_ids, output_files 

193 ) 

194 ] 

195 return super().configure( 

196 configuration_commands=cmds, 

197 data_target=data_target, 

198 output=output_files, 

199 scenario=scenario, 

200 configuration_ids=configuration_ids, 

201 sbatch_options=sbatch_options, 

202 slurm_prepend=slurm_prepend, 

203 validate_after=validate_after, 

204 num_parallel_jobs=num_parallel_jobs, 

205 base_dir=base_dir, 

206 run_on=run_on, 

207 ) 

208 

209 @staticmethod 

210 def organise_output( 

211 output_source: Path, 

212 output_target: Path, 

213 scenario: IRACEScenario, 

214 configuration_id: str, 

215 ) -> None | dict: 

216 """Method to restructure and clean up after a single configurator call.""" 

217 get_config = subprocess.run( 

218 [ 

219 "Rscript", 

220 "-e", 

221 'library("irace"); ' 

222 f'load("{output_source}"); ' 

223 "last <- length(iraceResults$iterationElites); " 

224 "id <- iraceResults$iterationElites[last]; " 

225 "print(getConfigurationById(iraceResults, ids = id))", 

226 ], 

227 capture_output=True, 

228 ) 

229 r_table = get_config.stdout.decode() 

230 if get_config.returncode != 0 or r_table.strip() == "": 

231 raise RuntimeError( 

232 "Failed to get configuration from IRACE file " 

233 f"{output_source}:\n" 

234 f"{get_config.stdout.decode()}\n" 

235 f"{get_config.stderr.decode()}" 

236 ) 

237 

238 # Join the table header and content together 

239 header = "" 

240 content = "" 

241 for i, line in enumerate(r_table.splitlines()): 

242 if i & 1 == 0: # Even lines are headers 

243 header += line 

244 else: # Odd lines are parameter values 

245 # First element is the ID 

246 line = " ".join(line.split(" ")[1:]) 

247 content += line 

248 # First header item is the ID 

249 header = [x for x in header.split(" ") if x != ""][1:] 

250 content = [x for x in content.split(" ") if x != ""][1:] 

251 configuration = "" 

252 for parameter, value in zip(header, content): 

253 if not parameter == ".PARENT." and value != "NA" and value != "<NA>": 

254 configuration += f"--{parameter} {value} " 

255 configuration = Solver.config_str_to_dict(configuration) 

256 return Configurator.save_configuration( 

257 scenario, configuration_id, configuration, output_target 

258 ) 

259 

260 def get_status_from_logs(self: Configurator) -> None: 

261 """Method to scan the log files of the configurator for warnings.""" 

262 raise NotImplementedError 

263 

264 

265class IRACEScenario(ConfigurationScenario): 

266 """Class for IRACE scenario.""" 

267 

268 def __init__( 

269 self: IRACEScenario, 

270 solver: Solver, 

271 instance_set: InstanceSet, 

272 sparkle_objectives: list[SparkleObjective], 

273 number_of_runs: int, 

274 parent_directory: Path, 

275 solver_calls: int = None, 

276 solver_cutoff_time: int = None, 

277 max_time: int = None, 

278 budget_estimation: float = None, 

279 first_test: int = None, 

280 mu: int = None, 

281 max_iterations: int = None, 

282 feature_data: FeatureDataFrame = None, 

283 timestamp: str = None, 

284 ) -> None: 

285 """Initialize scenario paths and names. 

286 

287 Args: 

288 solver: Solver that should be configured. 

289 instance_set: Instances object for the scenario. 

290 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

291 Will be simplified to the first objective. 

292 number_of_runs: The number of configurator runs to perform 

293 for configuring the solver. 

294 parent_directory: Path where the scenario files will be placed. 

295 solver_calls: The number of times the solver is called for each 

296 configuration run. [MaxExperiments] 

297 solver_cutoff_time: The maximum time allowed for each individual run during 

298 configuration. 

299 max_time: The time budget (CPU) allocated for the sum of solver calls 

300 done by the configurator in seconds. [MaxTime] 

301 budget_estimation: Fraction (smaller than 1) of the budget used to estimate 

302 the mean computation time of a configuration. Only used when maxTime > 0. 

303 Default: Computed as cutoff_time / max_time. [BudgetEstimation] 

304 first_test: Specifies how many instances are evaluated before the first 

305 elimination test. IRACE Default: 5. [firstTest] 

306 mu: Parameter used to define the number of configurations sampled and 

307 evaluated at each iteration. IRACE Default: 5. [mu] 

308 max_iterations: Maximum number of iterations to be executed. Each iteration 

309 involves the generation of new configurations and the use of racing to 

310 select the best configurations. By default (with 0), irace calculates a 

311 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where 

312 N^param is the number of non-fixed parameters to be tuned. 

313 Setting this parameter may make irace stop sooner than it should without 

314 using all the available budget. We recommend to use the default value. 

315 feature_data: FeatureDataFrame object with the feature data. 

316 Currently not supported by IRACE. 

317 timestamp: An optional timestamp for the directory name. 

318 """ 

319 """ 

320 Other possible arguments that are not added yet to Sparkle: 

321 --test-num-elites Number of elite configurations returned by irace that 

322 will be tested if test instances are provided. 

323 Default: 1. 

324 --test-iteration-elites Enable/disable testing the elite configurations 

325 found at each iteration. Default: 0. 

326 --test-type Statistical test used for elimination. The default 

327 value selects t-test if capping is enabled or F-test, 

328 otherwise. Valid values are: F-test (Friedman test), 

329 t-test (pairwise t-tests with no correction), 

330 t-test-bonferroni (t-test with Bonferroni's correction 

331 for multiple comparisons), t-test-holm (t-test with 

332 Holm's correction for multiple comparisons). 

333 --each-test Number of instances evaluated between elimination 

334 tests. Default: 1. 

335 --load-balancing Enable/disable load-balancing when executing 

336 experiments in parallel. Load-balancing makes better 

337 use of computing resources, but increases 

338 communication overhead. If this overhead is large, 

339 disabling load-balancing may be faster. Default: 1. 

340 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner 

341 in parallel (parameter parallel is the number of 

342 slaves). Default: 0. 

343 --batchmode Specify how irace waits for jobs to finish when 

344 targetRunner submits jobs to a batch cluster: sge, 

345 pbs, torque, slurm or htcondor. targetRunner must 

346 submit jobs to the cluster using, for example, qsub. 

347 Default: 0. 

348 --digits Maximum number of decimal places that are significant 

349 for numerical (real) parameters. Default: 4. 

350 --soft-restart Enable/disable the soft restart strategy that avoids 

351 premature convergence of the probabilistic model. 

352 Default: 1. 

353 --soft-restart-threshold Soft restart threshold value for numerical 

354 parameters. If NA, NULL or "", it is computed as 

355 10^-digits. 

356 -e,--elitist Enable/disable elitist irace. Default: 1. 

357 --elitist-new-instances Number of instances added to the execution list 

358 before previous instances in elitist irace. Default: 

359 1. 

360 --elitist-limit In elitist irace, maximum number per race of 

361 elimination tests that do not eliminate a 

362 configuration. Use 0 for no limit. Default: 2. 

363 --capping Enable the use of adaptive capping, a technique 

364 designed for minimizing the computation time of 

365 configurations. This is only available when elitist is 

366 active. Default: 0. 

367 --capping-type Measure used to obtain the execution bound from the 

368 performance of the elite configurations: median, mean, 

369 worst, best. Default: median. 

370 --bound-type Method to calculate the mean performance of elite 

371 configurations: candidate or instance. Default: 

372 candidate. 

373 --bound-max Maximum execution bound for targetRunner. It must be 

374 specified when capping is enabled. Default: 0. 

375 --bound-digits Precision used for calculating the execution time. It 

376 must be specified when capping is enabled. Default: 0. 

377 --bound-par Penalization constant for timed out executions 

378 (executions that reach boundMax execution time). 

379 Default: 1. 

380 --bound-as-timeout Replace the configuration cost of bounded executions 

381 with boundMax. Default: 1. 

382 --postselection Percentage of the configuration budget used to perform 

383 a postselection race of the best configurations of 

384 each iteration after the execution of irace. Default: 

385 0. 

386 --iterations Maximum number of iterations. Default: 0. 

387 --experiments-per-iteration Number of runs of the target algorithm per 

388 iteration. Default: 0. 

389 --min-survival Minimum number of configurations needed to continue 

390 the execution of each race (iteration). Default: 0. 

391 --num-configurations Number of configurations to be sampled and evaluated 

392 at each iteration. Default: 0. 

393 --confidence Confidence level for the elimination test. Default: 

394 0.95.""" 

395 super().__init__( 

396 solver, 

397 instance_set, 

398 sparkle_objectives, 

399 number_of_runs, 

400 parent_directory, 

401 timestamp, 

402 ) 

403 self.solver = solver 

404 self.instance_set = instance_set 

405 if sparkle_objectives is not None: 

406 self.sparkle_objective = sparkle_objectives[0] 

407 else: 

408 self.sparkle_objective = None 

409 

410 if feature_data is not None: 

411 print("WARNING: Instance features currently not supported by IRACE.") 

412 

413 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None 

414 self.max_time = max_time if max_time and max_time > 0 else None 

415 self.solver_cutoff_time = solver_cutoff_time 

416 self.budget_estimation = budget_estimation 

417 self.first_test = first_test 

418 self.mu = mu 

419 self.max_iterations = max_iterations 

420 

421 @property 

422 def instance_file_path(self: IRACEScenario) -> Path: 

423 """Return the path of the instance file.""" 

424 if self.directory: 

425 return self.directory / f"{self.instance_set.name}.txt" 

426 return None 

427 

428 @property 

429 def configurator(self: IRACEScenario) -> IRACE: 

430 """Return the type of configurator the scenario belongs to.""" 

431 return IRACE 

432 

433 def create_scenario(self: IRACEScenario) -> None: 

434 """Create scenario with solver and instances in the parent directory. 

435 

436 This prepares all the necessary subdirectories related to configuration. 

437 Removes any existing directory if it overlaps with the scenario name. 

438 

439 Args: 

440 parent_directory: Directory in which the scenario should be created. 

441 """ 

442 super().create_scenario() 

443 with self.instance_file_path.open("w+") as file: 

444 for instance_path in self.instance_set._instance_paths: 

445 file.write(f"{instance_path.name}\n") 

446 self.create_scenario_file() 

447 

448 def create_scenario_file(self: IRACEScenario) -> Path: 

449 """Create a file from the IRACE scenario. 

450 

451 Returns: 

452 Path to the created file. 

453 """ 

454 from sparkle.tools.parameters import PCSConvention 

455 

456 solver_path = self.solver.directory.absolute() 

457 pcs_path = self.solver.get_pcs_file(port_type=PCSConvention.IRACE).absolute() 

458 with self.scenario_file_path.open("w") as file: 

459 file.write( 

460 f'execDir = "{self.directory.absolute()}"\n' 

461 'targetRunnerLauncher = "python3"\n' 

462 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n' 

463 'targetCmdline = "{targetRunner} ' 

464 f"{solver_path} {self.sparkle_objective} {self.solver_cutoff_time} " 

465 '{configurationID} {instanceID} {seed} {instance} {targetRunnerArgs}"\n' 

466 f"deterministic = {1 if self.solver.deterministic else 0}\n" 

467 f'parameterFile = "{pcs_path.absolute()}"\n' 

468 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n' 

469 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n' 

470 "debugLevel = 1\n" # The verbosity level of IRACE 

471 ) 

472 if self.solver_calls is not None: 

473 file.write(f"maxExperiments = {self.solver_calls}\n") 

474 elif self.max_time is not None: 

475 file.write(f"maxTime = {self.max_time}\n") 

476 if self.solver_calls is not None and self.max_time is not None: 

477 print( 

478 "WARNING: Both solver calls and max time specified for scenario. " 

479 "This is not supported by IRACE, defaulting to solver calls." 

480 ) 

481 elif self.solver_calls is None and self.max_time is None: 

482 print( 

483 "WARNING: Neither solver calls nor max time specified. " 

484 "Either budget is required for the IRACE scenario." 

485 ) 

486 if self.max_time is not None and self.budget_estimation is None: 

487 # Auto Estimate 

488 if self.solver_cutoff_time < self.max_time: 

489 self.budget_estimation = self.solver_cutoff_time / self.max_time 

490 file.write(f"budgetEstimation = {self.budget_estimation}\n") 

491 if self.first_test is not None: 

492 file.write(f"firstTest = {self.first_test}\n") 

493 if self.mu is not None: 

494 file.write(f"mu = {self.mu}\n") 

495 if self.max_iterations is not None: 

496 file.write(f"nbIterations = {self.max_iterations}\n") 

497 print("Verifying contents of IRACE scenario file and testing solver call...") 

498 check_file = subprocess.run( 

499 [ 

500 f"{IRACE.configurator_executable().absolute()}", 

501 "-s", 

502 f"{self.scenario_file_path.absolute()}", 

503 "--check", 

504 ], 

505 capture_output=True, 

506 ) 

507 if check_file.returncode != 0: 

508 stdout_msg = "\n".join( 

509 [ 

510 line 

511 for line in check_file.stdout.decode().splitlines() 

512 if not line.startswith("#") 

513 ] 

514 ) 

515 print( 

516 "An error occured in the IRACE scenario file:\n", 

517 self.scenario_file_path.open("r").read(), 

518 stdout_msg, 

519 "\n", 

520 check_file.stderr.decode(), 

521 ) 

522 return None 

523 print("IRACE scenario file is valid.") 

524 return self.scenario_file_path 

525 

526 def serialise(self: IRACEScenario) -> dict: 

527 """Serialize the IRACE scenario.""" 

528 return { 

529 "number_of_runs": self.number_of_runs, 

530 "solver_calls": self.solver_calls, 

531 "max_time": self.max_time, 

532 "solver_cutoff_time": self.solver_cutoff_time, 

533 "budget_estimation": self.budget_estimation, 

534 "first_test": self.first_test, 

535 "mu": self.mu, 

536 "max_iterations": self.max_iterations, 

537 } 

538 

539 @staticmethod 

540 def from_file(scenario_file: Path) -> IRACEScenario: 

541 """Reads scenario file and initalises IRACEScenario.""" 

542 scenario_dict = { 

543 keyvalue[0]: keyvalue[1] 

544 for keyvalue in ( 

545 line.split(" = ", maxsplit=1) 

546 for line in scenario_file.open().readlines() 

547 if line.strip() != "" 

548 ) 

549 } 

550 _, solver_path, objective, cutoff, _, _, _, _, _ = scenario_dict.pop( 

551 "targetCmdline" 

552 ).split(" ") 

553 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)] 

554 scenario_dict["solver_cutoff_time"] = int(cutoff) 

555 scenario_dict["parent_directory"] = scenario_file.parent.parent 

556 scenario_dict["number_of_runs"] = len( 

557 [p for p in (scenario_file.parent / "results").iterdir()] 

558 ) 

559 scenario_dict.pop("targetRunner") 

560 scenario_dict.pop("execDir") 

561 scenario_dict.pop("targetRunnerLauncher") 

562 scenario_dict.pop("deterministic") 

563 scenario_dict.pop("parameterFile") 

564 scenario_dict.pop("debugLevel") 

565 instance_set_path = Path( 

566 scenario_dict.pop("trainInstancesDir").strip().strip('"') 

567 ) 

568 instance_set = Instance_Set(instance_set_path) 

569 solver_path = Path(solver_path.strip()) 

570 if solver_path.is_absolute(): 

571 solver_path = solver_path.relative_to(Path.cwd()) 

572 solver = Solver(solver_path) 

573 scenario_dict.pop("trainInstancesFile") 

574 # Replace keys with scenario variable names 

575 if "budgetEstimation" in scenario_dict: 

576 scenario_dict["budget_estimation"] = float( 

577 scenario_dict.pop(("budgetEstimation")) 

578 ) 

579 if "firstTest" in scenario_dict: 

580 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest")) 

581 if "mu" in scenario_dict: 

582 scenario_dict["mu"] = int(scenario_dict.pop("mu")) 

583 if "nbIterations" in scenario_dict: 

584 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations")) 

585 if "maxExperiments" in scenario_dict: 

586 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments")) 

587 if "maxTime" in scenario_dict: 

588 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime")) 

589 

590 timestamp = scenario_file.parent.name.split("_")[-1] 

591 return IRACEScenario(solver, instance_set, **scenario_dict, timestamp=timestamp)