Coverage for sparkle/configurator/implementations/irace.py: 72%
199 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
1"""Configurator classes to implement IRACE in Sparkle."""
2from __future__ import annotations
3import shutil
4import subprocess
5from pathlib import Path
7from sparkle.configurator.configurator import Configurator, ConfigurationScenario
8from sparkle.solver import Solver
9from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
10from sparkle.instance import InstanceSet, Instance_Set
11from sparkle.types import SparkleObjective, resolve_objective
13from runrunner import Runner, Run
16class IRACE(Configurator):
17 """Class for IRACE configurator."""
18 configurator_path = Path(__file__).parent.resolve() / "IRACE"
19 configurator_target = configurator_path / "irace_target_algorithm.py"
21 full_name = "Iterated Racing for Automatic Algorithm Configuration"
23 r_regex = r'\[\d+\]\s*["‘](?P<data>[^"`]+)["’]'
25 def __init__(self: IRACE) -> None:
26 """Initialize IRACE configurator."""
27 self._version: str = None
28 super().__init__(multi_objective_support=False)
30 @property
31 def name(self: IRACE) -> str:
32 """Returns the name of the configurator."""
33 return IRACE.__name__
35 @property
36 def version(self: IRACE) -> str:
37 """Returns the version of the configurator."""
38 if self._version is None:
39 import re
40 version_call = subprocess.run(["Rscript", "-e", "packageVersion('irace')"],
41 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
42 if version_call.returncode == 0:
43 r_data = re.search(IRACE.r_regex,
44 version_call.stdout.decode().strip())
45 if r_data is not None and r_data.group("data") is not None:
46 self._version = r_data.group("data")
47 return self._version
49 @staticmethod
50 def configurator_executable() -> Path:
51 """Returns the path to the IRACE executable.
53 # NOTE: For the base class this is a class property.
54 However as it must be calculated in this case, it is a class method as calculated
55 class properties do not exist in Python.
57 Returns:
58 Path to the executable if it can be found, else None.
59 """
60 if shutil.which("R") is None:
61 return None # Not installed
62 r_call = subprocess.run(
63 ["Rscript", "-e", "find.package('irace')"],
64 stdout=subprocess.PIPE,
65 stderr=subprocess.PIPE)
66 if r_call.returncode != 0:
67 return None # Not installed
68 import re
69 r_path = re.search(IRACE.r_regex,
70 r_call.stdout.decode().strip())
71 if r_path is None or r_path.group("data") is None:
72 return # Could not find IRACE?
73 path = Path(r_path.group("data"))
74 return path / "bin" / "irace"
76 @staticmethod
77 def scenario_class() -> ConfigurationScenario:
78 """Returns the IRACE scenario class."""
79 return IRACEScenario
81 @staticmethod
82 def check_requirements(verbose: bool = False) -> bool:
83 """Check that IRACE is installed."""
84 import warnings
85 if shutil.which("R") is None:
86 if verbose:
87 warnings.warn(
88 "IRACE requires R, but R is not installed. "
89 "Please ensure R is installed.")
90 return False
91 if not IRACE.configurator_executable():
92 if verbose:
93 warnings.warn(
94 "IRACE executable not found. Please ensure IRACE is installed "
95 f"in the expected Path ({IRACE.configurator_path}).")
96 return False
97 return True
99 @staticmethod
100 def download_requirements() -> None:
101 """Download IRACE."""
102 if shutil.which("R") is None:
103 raise RuntimeError("IRACE requires R, but R is not installed.")
104 # Ensure personal library exists, do not raise warnings
105 subprocess.run([
106 "Rscript", "-e",
107 "dir.create(path = Sys.getenv('R_LIBS_USER'), "
108 "showWarnings = FALSE, recursive = TRUE)"],
109 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
110 install_irace = subprocess.run(
111 ["Rscript", "-e",
112 # Install R
113 "install.packages('irace', "
114 "lib=Sys.getenv('R_LIBS_USER'), " # Install in user library
115 "dependencies = TRUE, " # Ensure dependencies are installed
116 "repos='https://cloud.r-project.org')"], # Set source
117 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
118 print(f"{install_irace.stdout.decode()}\n\n"
119 f"{install_irace.stderr.decode()}")
120 if install_irace.returncode != 0:
121 import warnings
122 warnings.warn("IRACE had a non-zero return code during installation!\n\n"
123 f"{install_irace.stdout.decode()}\n\n"
124 f"{install_irace.stderr.decode()}")
126 def configure(self: IRACE,
127 scenario: ConfigurationScenario,
128 data_target: PerformanceDataFrame,
129 validate_after: bool = True,
130 sbatch_options: list[str] = [],
131 slurm_prepend: str | list[str] | Path = None,
132 num_parallel_jobs: int = None,
133 base_dir: Path = None,
134 run_on: Runner = Runner.SLURM) -> Run:
135 """Start configuration job.
137 Args:
138 scenario: ConfigurationScenario to execute.
139 data_target: PerformanceDataFrame where to store the found configurations
140 validate_after: Whether to validate the configuration on the training set
141 afterwards or not.
142 sbatch_options: List of slurm batch options to use
143 slurm_prepend: Slurm script to prepend to the sbatch
144 num_parallel_jobs: The maximum number of jobs to run in parallel
145 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed
146 run_on: On which platform to run the jobs. Default: Slurm.
148 Returns:
149 A RunRunner Run object.
150 """
151 scenario.create_scenario()
152 configuration_ids = scenario.configuration_ids
153 # Create command to call IRACE. Create plural based on number of runs
154 # TODO: Setting seeds like this is weird and should be inspected.
155 seeds = [i for i in range(scenario.number_of_runs)]
156 output_files = [
157 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata"
158 for job_idx in configuration_ids]
159 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
160 f"{IRACE.__name__} {output_path} {data_target.csv_filepath} "
161 f"{scenario.scenario_file_path} {configuration_id} "
162 f"{IRACE.configurator_executable().absolute()} "
163 f"--scenario {scenario.scenario_file_path} "
164 f"--log-file {output_path} "
165 f"--seed {seed}" for seed, configuration_id, output_path
166 in zip(seeds, configuration_ids, output_files)]
167 return super().configure(
168 configuration_commands=cmds,
169 data_target=data_target,
170 output=output_files,
171 scenario=scenario,
172 configuration_ids=configuration_ids,
173 sbatch_options=sbatch_options,
174 slurm_prepend=slurm_prepend,
175 validate_after=validate_after,
176 num_parallel_jobs=num_parallel_jobs,
177 base_dir=base_dir,
178 run_on=run_on
179 )
181 @staticmethod
182 def organise_output(output_source: Path,
183 output_target: Path,
184 scenario: IRACEScenario,
185 configuration_id: str) -> None | dict:
186 """Method to restructure and clean up after a single configurator call."""
187 get_config = subprocess.run(
188 ["Rscript", "-e",
189 'library("irace"); '
190 f'load("{output_source}"); '
191 "last <- length(iraceResults$iterationElites); "
192 "id <- iraceResults$iterationElites[last]; "
193 "print(getConfigurationById(iraceResults, ids = id))"],
194 capture_output=True)
195 r_table = get_config.stdout.decode()
196 if get_config.returncode != 0 or r_table.strip() == "":
197 raise RuntimeError("Failed to get configuration from IRACE file "
198 f"{output_source}:\n"
199 f"{get_config.stdout.decode()}\n"
200 f"{get_config.stderr.decode()}")
202 # Join the table header and content together
203 header = ""
204 content = ""
205 for i, line in enumerate(r_table.splitlines()):
206 if i & 1 == 0: # Even lines are headers
207 header += line
208 else: # Odd lines are parameter values
209 # First element is the ID
210 line = " ".join(line.split(" ")[1:])
211 content += line
212 # First header item is the ID
213 header = [x for x in header.split(" ") if x != ""][1:]
214 content = [x for x in content.split(" ") if x != ""][1:]
215 configuration = ""
216 for parameter, value in zip(header, content):
217 if not parameter == ".PARENT." and value != "NA" and value != "<NA>":
218 configuration += f"--{parameter} {value} "
219 configuration = Solver.config_str_to_dict(configuration)
220 return Configurator.save_configuration(scenario, configuration_id,
221 configuration, output_target)
223 def get_status_from_logs(self: Configurator) -> None:
224 """Method to scan the log files of the configurator for warnings."""
225 raise NotImplementedError
228class IRACEScenario(ConfigurationScenario):
229 """Class for IRACE scenario."""
231 def __init__(self: IRACEScenario,
232 solver: Solver,
233 instance_set: InstanceSet,
234 sparkle_objectives: list[SparkleObjective],
235 number_of_runs: int,
236 parent_directory: Path,
237 solver_calls: int = None,
238 solver_cutoff_time: int = None,
239 max_time: int = None,
240 budget_estimation: float = None,
241 first_test: int = None,
242 mu: int = None,
243 max_iterations: int = None,
244 feature_data: FeatureDataFrame = None,
245 )\
246 -> None:
247 """Initialize scenario paths and names.
249 Args:
250 solver: Solver that should be configured.
251 instance_set: Instances object for the scenario.
252 sparkle_objectives: SparkleObjectives used for each run of the configuration.
253 Will be simplified to the first objective.
254 number_of_runs: The number of configurator runs to perform
255 for configuring the solver.
256 parent_directory: Path where the scenario files will be placed.
257 solver_calls: The number of times the solver is called for each
258 configuration run. [MaxExperiments]
259 solver_cutoff_time: The maximum time allowed for each individual run during
260 configuration.
261 max_time: The time budget (CPU) allocated for the sum of solver calls
262 done by the configurator in seconds. [MaxTime]
263 budget_estimation: Fraction (smaller than 1) of the budget used to estimate
264 the mean computation time of a configuration. Only used when maxTime > 0.
265 Default: Computed as cutoff_time / max_time. [BudgetEstimation]
266 first_test: Specifies how many instances are evaluated before the first
267 elimination test. IRACE Default: 5. [firstTest]
268 mu: Parameter used to define the number of configurations sampled and
269 evaluated at each iteration. IRACE Default: 5. [mu]
270 max_iterations: Maximum number of iterations to be executed. Each iteration
271 involves the generation of new configurations and the use of racing to
272 select the best configurations. By default (with 0), irace calculates a
273 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where
274 N^param is the number of non-fixed parameters to be tuned.
275 Setting this parameter may make irace stop sooner than it should without
276 using all the available budget. We recommend to use the default value.
277 feature_data: FeatureDataFrame object with the feature data.
278 Currently not supported by IRACE.
279 """
280 """
281 Other possible arguments that are not added yet to Sparkle:
282 --test-num-elites Number of elite configurations returned by irace that
283 will be tested if test instances are provided.
284 Default: 1.
285 --test-iteration-elites Enable/disable testing the elite configurations
286 found at each iteration. Default: 0.
287 --test-type Statistical test used for elimination. The default
288 value selects t-test if capping is enabled or F-test,
289 otherwise. Valid values are: F-test (Friedman test),
290 t-test (pairwise t-tests with no correction),
291 t-test-bonferroni (t-test with Bonferroni's correction
292 for multiple comparisons), t-test-holm (t-test with
293 Holm's correction for multiple comparisons).
294 --each-test Number of instances evaluated between elimination
295 tests. Default: 1.
296 --load-balancing Enable/disable load-balancing when executing
297 experiments in parallel. Load-balancing makes better
298 use of computing resources, but increases
299 communication overhead. If this overhead is large,
300 disabling load-balancing may be faster. Default: 1.
301 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner
302 in parallel (parameter parallel is the number of
303 slaves). Default: 0.
304 --batchmode Specify how irace waits for jobs to finish when
305 targetRunner submits jobs to a batch cluster: sge,
306 pbs, torque, slurm or htcondor. targetRunner must
307 submit jobs to the cluster using, for example, qsub.
308 Default: 0.
309 --digits Maximum number of decimal places that are significant
310 for numerical (real) parameters. Default: 4.
311 --soft-restart Enable/disable the soft restart strategy that avoids
312 premature convergence of the probabilistic model.
313 Default: 1.
314 --soft-restart-threshold Soft restart threshold value for numerical
315 parameters. If NA, NULL or "", it is computed as
316 10^-digits.
317 -e,--elitist Enable/disable elitist irace. Default: 1.
318 --elitist-new-instances Number of instances added to the execution list
319 before previous instances in elitist irace. Default:
320 1.
321 --elitist-limit In elitist irace, maximum number per race of
322 elimination tests that do not eliminate a
323 configuration. Use 0 for no limit. Default: 2.
324 --capping Enable the use of adaptive capping, a technique
325 designed for minimizing the computation time of
326 configurations. This is only available when elitist is
327 active. Default: 0.
328 --capping-type Measure used to obtain the execution bound from the
329 performance of the elite configurations: median, mean,
330 worst, best. Default: median.
331 --bound-type Method to calculate the mean performance of elite
332 configurations: candidate or instance. Default:
333 candidate.
334 --bound-max Maximum execution bound for targetRunner. It must be
335 specified when capping is enabled. Default: 0.
336 --bound-digits Precision used for calculating the execution time. It
337 must be specified when capping is enabled. Default: 0.
338 --bound-par Penalization constant for timed out executions
339 (executions that reach boundMax execution time).
340 Default: 1.
341 --bound-as-timeout Replace the configuration cost of bounded executions
342 with boundMax. Default: 1.
343 --postselection Percentage of the configuration budget used to perform
344 a postselection race of the best configurations of
345 each iteration after the execution of irace. Default:
346 0.
347 --iterations Maximum number of iterations. Default: 0.
348 --experiments-per-iteration Number of runs of the target algorithm per
349 iteration. Default: 0.
350 --min-survival Minimum number of configurations needed to continue
351 the execution of each race (iteration). Default: 0.
352 --num-configurations Number of configurations to be sampled and evaluated
353 at each iteration. Default: 0.
354 --confidence Confidence level for the elimination test. Default:
355 0.95."""
356 super().__init__(solver, instance_set, sparkle_objectives,
357 number_of_runs, parent_directory)
358 self.solver = solver
359 self.instance_set = instance_set
360 if sparkle_objectives is not None:
361 self.sparkle_objective = sparkle_objectives[0]
362 else:
363 self.sparkle_objective = None
365 if feature_data is not None:
366 print("WARNING: Instance features currently not supported by IRACE.")
368 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None
369 self.max_time = max_time if max_time and max_time > 0 else None
370 self.solver_cutoff_time = solver_cutoff_time
371 self.budget_estimation = budget_estimation
372 self.first_test = first_test
373 self.mu = mu
374 self.max_iterations = max_iterations
376 # Pathing
377 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
378 self.validation = self.directory / "validation"
379 self.results_directory = self.directory / "results"
381 @property
382 def configurator(self: IRACEScenario) -> IRACE:
383 """Return the type of configurator the scenario belongs to."""
384 return IRACE
386 def create_scenario(self: IRACEScenario) -> None:
387 """Create scenario with solver and instances in the parent directory.
389 This prepares all the necessary subdirectories related to configuration.
390 Removes any existing directory if it overlaps with the scenario name.
392 Args:
393 parent_directory: Directory in which the scenario should be created.
394 """
395 # Set up directories
396 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory
397 self.directory.mkdir(exist_ok=True, parents=True)
398 self.tmp.mkdir(exist_ok=True)
399 self.validation.mkdir(exist_ok=True)
400 self.results_directory.mkdir(exist_ok=True)
402 with self.instance_file_path.open("w+") as file:
403 for instance_path in self.instance_set._instance_paths:
404 file.write(f"{instance_path.name}\n")
405 self.create_scenario_file()
407 def create_scenario_file(self: IRACEScenario) -> Path:
408 """Create a file from the IRACE scenario.
410 Returns:
411 Path to the created file.
412 """
413 super().create_scenario_file()
414 from sparkle.tools.parameters import PCSConvention
415 solver_path = self.solver.directory.absolute()
416 pcs_path = self.solver.get_pcs_file(port_type=PCSConvention.IRACE).absolute()
417 with self.scenario_file_path.open("w") as file:
418 file.write(
419 f'execDir = "{self.directory.absolute()}"\n'
420 'targetRunnerLauncher = "python3"\n'
421 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n'
422 'targetCmdline = "{targetRunner} '
423 f"{solver_path} {self.sparkle_objective} {self.solver_cutoff_time} "
424 '{configurationID} {instanceID} {seed} {instance} {targetRunnerArgs}"\n'
425 f"deterministic = {1 if self.solver.deterministic else 0}\n"
426 f'parameterFile = "{pcs_path.absolute()}"\n'
427 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n'
428 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n'
429 "debugLevel = 1\n" # The verbosity level of IRACE
430 )
431 if self.solver_calls is not None:
432 file.write(f"maxExperiments = {self.solver_calls}\n")
433 elif self.max_time is not None:
434 file.write(f"maxTime = {self.max_time}\n")
435 if self.solver_calls is not None and self.max_time is not None:
436 print("WARNING: Both solver calls and max time specified for scenario. "
437 "This is not supported by IRACE, defaulting to solver calls.")
438 elif self.solver_calls is None and self.max_time is None:
439 print("WARNING: Neither solver calls nor max time specified. "
440 "Either budget is required for the IRACE scenario.")
441 if self.max_time is not None and self.budget_estimation is None:
442 # Auto Estimate
443 if self.solver_cutoff_time < self.max_time:
444 self.budget_estimation = self.solver_cutoff_time / self.max_time
445 file.write(f"budgetEstimation = {self.budget_estimation}\n")
446 if self.first_test is not None:
447 file.write(f"firstTest = {self.first_test}\n")
448 if self.mu is not None:
449 file.write(f"mu = {self.mu}\n")
450 if self.max_iterations is not None:
451 file.write(f"nbIterations = {self.max_iterations}\n")
452 print("Verifying contents of IRACE scenario file and testing solver call...")
453 check_file = subprocess.run(
454 [f"{IRACE.configurator_executable().absolute()}",
455 "-s", f"{self.scenario_file_path.absolute()}", "--check"],
456 capture_output=True)
457 if check_file.returncode != 0:
458 stdout_msg = "\n".join([
459 line for line in check_file.stdout.decode().splitlines()
460 if not line.startswith("#")])
461 print("An error occured in the IRACE scenario file:\n",
462 self.scenario_file_path.open("r").read(),
463 stdout_msg, "\n",
464 check_file.stderr.decode())
465 return None
466 print("IRACE scenario file is valid.")
467 return self.scenario_file_path
469 def serialise(self: IRACEScenario) -> dict:
470 """Serialize the IRACE scenario."""
471 return {
472 "number_of_runs": self.number_of_runs,
473 "solver_calls": self.solver_calls,
474 "max_time": self.max_time,
475 "solver_cutoff_time": self.solver_cutoff_time,
476 "budget_estimation": self.budget_estimation,
477 "first_test": self.first_test,
478 "mu": self.mu,
479 "max_iterations": self.max_iterations,
480 }
482 @staticmethod
483 def from_file(scenario_file: Path) -> IRACEScenario:
484 """Reads scenario file and initalises IRACEScenario."""
485 scenario_dict = {keyvalue[0]: keyvalue[1]
486 for keyvalue in (line.split(" = ", maxsplit=1)
487 for line in scenario_file.open().readlines()
488 if line.strip() != "")}
489 _, solver_path, objective, cutoff, _, _, _, _, _ =\
490 scenario_dict.pop("targetCmdline").split(" ")
491 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)]
492 scenario_dict["solver_cutoff_time"] = int(cutoff)
493 scenario_dict["parent_directory"] = scenario_file.parent.parent
494 scenario_dict["number_of_runs"] =\
495 len([p for p in (scenario_file.parent / "results").iterdir()])
496 scenario_dict.pop("targetRunner")
497 scenario_dict.pop("execDir")
498 scenario_dict.pop("targetRunnerLauncher")
499 scenario_dict.pop("deterministic")
500 scenario_dict.pop("parameterFile")
501 scenario_dict.pop("debugLevel")
502 instance_set_path =\
503 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"'))
504 instance_set = Instance_Set(instance_set_path)
505 solver = Solver(Path(solver_path.strip()))
506 scenario_dict.pop("trainInstancesFile")
507 # Replace keys with scenario variable names
508 if "budgetEstimation" in scenario_dict:
509 scenario_dict["budget_estimation"] =\
510 float(scenario_dict.pop(("budgetEstimation")))
511 if "firstTest" in scenario_dict:
512 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest"))
513 if "mu" in scenario_dict:
514 scenario_dict["mu"] = int(scenario_dict.pop("mu"))
515 if "nbIterations" in scenario_dict:
516 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations"))
517 if "maxExperiments" in scenario_dict:
518 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments"))
519 if "maxTime" in scenario_dict:
520 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime"))
522 return IRACEScenario(solver, instance_set, **scenario_dict)