Coverage for sparkle/configurator/implementations/irace.py: 50%
170 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-13 10:34 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-13 10:34 +0000
1"""Configurator classes to implement IRACE in Sparkle."""
2from __future__ import annotations
3import shutil
4import subprocess
5from pathlib import Path
7from sparkle.configurator.configurator import Configurator, ConfigurationScenario
8from sparkle.solver import Solver
9from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
10from sparkle.instance import InstanceSet, Instance_Set
11from sparkle.types import SparkleObjective, resolve_objective
13from runrunner import Runner, Run
16class IRACE(Configurator):
17 """Class for IRACE configurator."""
18 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
19 "Components/irace-v4.2.0"
20 configurator_package = configurator_path / "irace_4.2.0.tar"
21 # NOTE: There are possible dependencies that we do not install here.
22 # TODO: Determine if we should add them or not.
23 package_dependencies = ["codetools_0.2-20.tar", "data.table_1.16.4.tar",
24 "matrixStats_1.5.0.tar", "spacefillr_0.3.3.tar"]
25 configurator_executable = configurator_path / "irace" / "bin" / "irace"
26 configurator_ablation_executable = configurator_path / "irace" / "bin" / "ablation"
27 configurator_target = configurator_path / "irace_target_algorithm.py"
29 version = "3.5"
30 full_name = "Iterated Racing for Automatic Algorithm Configuration"
32 def __init__(self: Configurator,
33 output_path: Path,
34 base_dir: Path,
35 ) -> None:
36 """Initialize IRACE configurator."""
37 output_path = output_path / IRACE.__name__
38 output_path.mkdir(parents=True, exist_ok=True)
39 super().__init__(output_path=output_path,
40 base_dir=base_dir,
41 tmp_path=output_path / "tmp",
42 multi_objective_support=False)
44 @property
45 def name(self: IRACE) -> str:
46 """Returns the name of the configurator."""
47 return IRACE.__name__
49 @staticmethod
50 def scenario_class() -> ConfigurationScenario:
51 """Returns the IRACE scenario class."""
52 return IRACEScenario
54 def configure(self: IRACE,
55 scenario: ConfigurationScenario,
56 data_target: PerformanceDataFrame,
57 validate_after: bool = True,
58 sbatch_options: list[str] = [],
59 slurm_prepend: str | list[str] | Path = None,
60 num_parallel_jobs: int = None,
61 base_dir: Path = None,
62 run_on: Runner = Runner.SLURM) -> Run:
63 """Start configuration job.
65 Args:
66 scenario: ConfigurationScenario to execute.
67 data_target: PerformanceDataFrame where to store the found configurations
68 validate_after: Whether to validate the configuration on the training set
69 afterwards or not.
70 sbatch_options: List of slurm batch options to use
71 slurm_prepend: Slurm script to prepend to the sbatch
72 num_parallel_jobs: The maximum number of jobs to run in parallel
73 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed
74 run_on: On which platform to run the jobs. Default: Slurm.
76 Returns:
77 A RunRunner Run object.
78 """
79 scenario.create_scenario()
80 output_csv = scenario.validation / "configurations.csv"
81 output_csv.parent.mkdir(exist_ok=True, parents=True)
83 # Create command to call IRACE. Create plural based on number of runs var
84 # We set the seed over the last n run ids in the dataframe
85 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:]
86 output_files = [
87 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata"
88 for job_idx in seeds]
89 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
90 f"{IRACE.__name__} {output_path} {data_target.csv_filepath} "
91 f"{scenario.scenario_file_path} {seed} "
92 f"{IRACE.configurator_executable.absolute()} "
93 f"--scenario {scenario.scenario_file_path} "
94 f"--log-file {output_path} "
95 f"--seed {seed}" for seed, output_path in zip(seeds, output_files)]
96 return super().configure(
97 configuration_commands=cmds,
98 data_target=data_target,
99 output=output_files,
100 scenario=scenario,
101 sbatch_options=sbatch_options,
102 slurm_prepend=slurm_prepend,
103 validation_ids=seeds if validate_after else None,
104 num_parallel_jobs=num_parallel_jobs,
105 base_dir=base_dir,
106 run_on=run_on
107 )
109 @staticmethod
110 def organise_output(output_source: Path,
111 output_target: Path,
112 scenario: IRACEScenario,
113 run_id: int) -> None | dict:
114 """Method to restructure and clean up after a single configurator call."""
115 from filelock import FileLock
116 get_config = subprocess.run(
117 ["Rscript", "-e",
118 'library("irace"); '
119 f'load("{output_source}"); '
120 "last <- length(iraceResults$iterationElites); "
121 "id <- iraceResults$iterationElites[last]; "
122 "print(getConfigurationById(iraceResults, ids = id))"],
123 capture_output=True)
124 r_table = get_config.stdout.decode()
125 if get_config.returncode != 0 or r_table.strip() == "":
126 raise RuntimeError("Failed to get configuration from IRACE file "
127 f"{output_source}:\n"
128 f"{get_config.stdout.decode()}\n"
129 f"{get_config.stderr.decode()}")
131 # Join the table header and content together
132 header = ""
133 content = ""
134 for i, line in enumerate(r_table.splitlines()):
135 if i & 1 == 0: # Even lines are headers
136 header += line
137 else: # Odd lines are parameter values
138 # First element is the ID
139 line = " ".join(line.split(" ")[1:])
140 content += line
141 # First header item is the ID
142 header = [x for x in header.split(" ") if x != ""][1:]
143 content = [x for x in content.split(" ") if x != ""][1:]
144 configuration = ""
145 for parameter, value in zip(header, content):
146 if not parameter == ".PARENT." and value != "NA" and value != "<NA>":
147 configuration += f"--{parameter} {value} "
148 configuration = Solver.config_str_to_dict(configuration)
149 if output_target is None or not output_target.exists():
150 return configuration
152 time_stamp = scenario.scenario_file_path.stat().st_mtime
153 configuration["configuration_id"] =\
154 f"{IRACE.__name__}_{time_stamp}_{run_id}"
155 instance_names = scenario.instance_set.instance_names
156 lock = FileLock(f"{output_target}.lock")
157 with lock.acquire(timeout=60):
158 performance_data = PerformanceDataFrame(output_target)
159 # Resolve absolute path to Solver column
160 solver = [s for s in performance_data.solvers
161 if Path(s).name == scenario.solver.name][0]
162 # For some reason the instance paths in the instance set are absolute
163 instances = [instance for instance in performance_data.instances
164 if Path(instance).name in instance_names]
165 # We don't set the seed in the dataframe, as that should be part of the conf
166 performance_data.set_value(
167 value=[str(configuration)],
168 solver=solver,
169 instance=instances,
170 objective=None,
171 run=run_id,
172 solver_fields=[PerformanceDataFrame.column_configuration]
173 )
174 performance_data.save_csv()
176 def get_status_from_logs(self: Configurator) -> None:
177 """Method to scan the log files of the configurator for warnings."""
178 raise NotImplementedError
181class IRACEScenario(ConfigurationScenario):
182 """Class for IRACE scenario."""
184 def __init__(self: ConfigurationScenario,
185 solver: Solver,
186 instance_set: InstanceSet,
187 sparkle_objectives: list[SparkleObjective],
188 parent_directory: Path,
189 number_of_runs: int = None, solver_calls: int = None,
190 cutoff_time: int = None,
191 max_time: int = None,
192 budget_estimation: float = None,
193 first_test: int = None,
194 mu: int = None,
195 max_iterations: int = None,
196 feature_data: FeatureDataFrame = None,
197 )\
198 -> None:
199 """Initialize scenario paths and names.
201 Args:
202 solver: Solver that should be configured.
203 instance_set: Instances object for the scenario.
204 sparkle_objectives: SparkleObjectives used for each run of the configuration.
205 Will be simplified to the first objective.
206 parent_directory: Path where the scenario files will be placed.
207 number_of_runs: The number of configurator runs to perform
208 for configuring the solver.
209 solver_calls: The number of times the solver is called for each
210 configuration run. [MaxExperiments]
211 cutoff_time: The maximum time allowed for each individual run during
212 configuration.
213 max_time: The time budget (CPU) allocated for the sum of solver calls
214 done by the configurator in seconds. [MaxTime]
215 budget_estimation: Fraction (smaller than 1) of the budget used to estimate
216 the mean computation time of a configuration. Only used when maxTime > 0.
217 Default: Computed as cutoff_time / max_time. [BudgetEstimation]
218 first_test: Specifies how many instances are evaluated before the first
219 elimination test. IRACE Default: 5. [firstTest]
220 mu: Parameter used to define the number of configurations sampled and
221 evaluated at each iteration. IRACE Default: 5. [mu]
222 max_iterations: Maximum number of iterations to be executed. Each iteration
223 involves the generation of new configurations and the use of racing to
224 select the best configurations. By default (with 0), irace calculates a
225 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where
226 N^param is the number of non-fixed parameters to be tuned.
227 Setting this parameter may make irace stop sooner than it should without
228 using all the available budget. We recommend to use the default value.
229 feature_data: FeatureDataFrame object with the feature data.
230 Currently not supported by IRACE.
231 """
232 """
233 Other possible arguments that are not added yet to Sparkle:
234 --test-num-elites Number of elite configurations returned by irace that
235 will be tested if test instances are provided.
236 Default: 1.
237 --test-iteration-elites Enable/disable testing the elite configurations
238 found at each iteration. Default: 0.
239 --test-type Statistical test used for elimination. The default
240 value selects t-test if capping is enabled or F-test,
241 otherwise. Valid values are: F-test (Friedman test),
242 t-test (pairwise t-tests with no correction),
243 t-test-bonferroni (t-test with Bonferroni's correction
244 for multiple comparisons), t-test-holm (t-test with
245 Holm's correction for multiple comparisons).
246 --each-test Number of instances evaluated between elimination
247 tests. Default: 1.
248 --load-balancing Enable/disable load-balancing when executing
249 experiments in parallel. Load-balancing makes better
250 use of computing resources, but increases
251 communication overhead. If this overhead is large,
252 disabling load-balancing may be faster. Default: 1.
253 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner
254 in parallel (parameter parallel is the number of
255 slaves). Default: 0.
256 --batchmode Specify how irace waits for jobs to finish when
257 targetRunner submits jobs to a batch cluster: sge,
258 pbs, torque, slurm or htcondor. targetRunner must
259 submit jobs to the cluster using, for example, qsub.
260 Default: 0.
261 --digits Maximum number of decimal places that are significant
262 for numerical (real) parameters. Default: 4.
263 --soft-restart Enable/disable the soft restart strategy that avoids
264 premature convergence of the probabilistic model.
265 Default: 1.
266 --soft-restart-threshold Soft restart threshold value for numerical
267 parameters. If NA, NULL or "", it is computed as
268 10^-digits.
269 -e,--elitist Enable/disable elitist irace. Default: 1.
270 --elitist-new-instances Number of instances added to the execution list
271 before previous instances in elitist irace. Default:
272 1.
273 --elitist-limit In elitist irace, maximum number per race of
274 elimination tests that do not eliminate a
275 configuration. Use 0 for no limit. Default: 2.
276 --capping Enable the use of adaptive capping, a technique
277 designed for minimizing the computation time of
278 configurations. This is only available when elitist is
279 active. Default: 0.
280 --capping-type Measure used to obtain the execution bound from the
281 performance of the elite configurations: median, mean,
282 worst, best. Default: median.
283 --bound-type Method to calculate the mean performance of elite
284 configurations: candidate or instance. Default:
285 candidate.
286 --bound-max Maximum execution bound for targetRunner. It must be
287 specified when capping is enabled. Default: 0.
288 --bound-digits Precision used for calculating the execution time. It
289 must be specified when capping is enabled. Default: 0.
290 --bound-par Penalization constant for timed out executions
291 (executions that reach boundMax execution time).
292 Default: 1.
293 --bound-as-timeout Replace the configuration cost of bounded executions
294 with boundMax. Default: 1.
295 --postselection Percentage of the configuration budget used to perform
296 a postselection race of the best configurations of
297 each iteration after the execution of irace. Default:
298 0.
299 --iterations Maximum number of iterations. Default: 0.
300 --experiments-per-iteration Number of runs of the target algorithm per
301 iteration. Default: 0.
302 --min-survival Minimum number of configurations needed to continue
303 the execution of each race (iteration). Default: 0.
304 --num-configurations Number of configurations to be sampled and evaluated
305 at each iteration. Default: 0.
306 --confidence Confidence level for the elimination test. Default:
307 0.95."""
308 super().__init__(solver, instance_set, sparkle_objectives, parent_directory)
309 self.solver = solver
310 self.instance_set = instance_set
311 if sparkle_objectives is not None:
312 self.sparkle_objective = sparkle_objectives[0]
313 else:
314 self.sparkle_objective = None
316 if feature_data is not None:
317 print("WARNING: Instance features currently not supported by IRACE.")
319 self.number_of_runs = number_of_runs
320 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None
321 self.max_time = max_time if max_time and max_time > 0 else None
322 self.cutoff_time = cutoff_time
323 self.budget_estimation = budget_estimation
324 self.first_test = first_test
325 self.mu = mu
326 self.max_iterations = max_iterations
328 # Pathing
329 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
330 self.tmp = self.directory / "tmp"
331 self.validation = self.directory / "validation"
332 self.results_directory = self.directory / "results"
334 def create_scenario(self: IRACEScenario) -> None:
335 """Create scenario with solver and instances in the parent directory.
337 This prepares all the necessary subdirectories related to configuration.
338 Removes any existing directory if it overlaps with the scenario name.
340 Args:
341 parent_directory: Directory in which the scenario should be created.
342 """
343 # Set up directories
344 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory
345 self.directory.mkdir(exist_ok=True, parents=True)
346 self.tmp.mkdir(exist_ok=True)
347 self.validation.mkdir(exist_ok=True)
348 self.results_directory.mkdir(exist_ok=True)
350 with self.instance_file_path.open("w+") as file:
351 for instance_path in self.instance_set._instance_paths:
352 file.write(f"{instance_path.name}\n")
353 self.create_scenario_file()
355 def create_scenario_file(self: ConfigurationScenario) -> Path:
356 """Create a file from the IRACE scenario.
358 Returns:
359 Path to the created file.
360 """
361 from sparkle.tools.parameters import PCSConvention
362 solver_path = self.solver.directory.absolute()
363 pcs_path = self.solver.get_pcs_file(port_type=PCSConvention.IRACE).absolute()
364 with self.scenario_file_path.open("w") as file:
365 file.write(
366 f'execDir = "{self.directory.absolute()}"\n'
367 'targetRunnerLauncher = "python3"\n'
368 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n'
369 'targetCmdline = "{targetRunner} '
370 f"{solver_path} {self.sparkle_objective} {self.cutoff_time} "
371 '{configurationID} {instanceID} {seed} {instance} {targetRunnerArgs}"\n'
372 f"deterministic = {1 if self.solver.deterministic else 0}\n"
373 f'parameterFile = "{pcs_path.absolute()}"\n'
374 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n'
375 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n'
376 "debugLevel = 1\n" # The verbosity level of IRACE
377 )
378 if self.solver_calls is not None:
379 file.write(f"maxExperiments = {self.solver_calls}\n")
380 elif self.max_time is not None:
381 file.write(f"maxTime = {self.max_time}\n")
382 if self.solver_calls is not None and self.max_time is not None:
383 print("WARNING: Both solver calls and max time specified for scenario. "
384 "This is not supported by IRACE, defaulting to solver calls.")
385 elif self.solver_calls is None and self.max_time is None:
386 print("WARNING: Neither solver calls nor max time specified. "
387 "Either budget is required for the IRACE scenario.")
388 if self.max_time is not None and self.budget_estimation is None:
389 # Auto Estimate
390 if self.cutoff_time < self.max_time:
391 self.budget_estimation = self.cutoff_time / self.max_time
392 file.write(f"budgetEstimation = {self.budget_estimation}\n")
393 if self.first_test is not None:
394 file.write(f"firstTest = {self.first_test}\n")
395 if self.mu is not None:
396 file.write(f"mu = {self.mu}\n")
397 if self.max_iterations is not None:
398 file.write(f"nbIterations = {self.max_iterations}\n")
399 print("Verifying contents of IRACE scenario file and testing solver call...")
400 check_file = subprocess.run(
401 [f"{IRACE.configurator_executable.absolute()}",
402 "-s", f"{self.scenario_file_path.absolute()}", "--check"],
403 capture_output=True)
404 if check_file.returncode != 0:
405 stdout_msg = "\n".join([
406 line for line in check_file.stdout.decode().splitlines()
407 if not line.startswith("#")])
408 print("An error occured in the IRACE scenario file:\n",
409 self.scenario_file_path.open("r").read(),
410 stdout_msg, "\n",
411 check_file.stderr.decode())
412 return None
413 print("IRACE scenario file is valid.")
414 return self.scenario_file_path
416 def serialize(self: IRACEScenario) -> dict:
417 """Serialize the IRACE scenario."""
418 return {
419 "number_of_runs": self.number_of_runs,
420 "solver_calls": self.solver_calls,
421 "max_time": self.max_time,
422 "cutoff_time": self.cutoff_time,
423 "budget_estimation": self.budget_estimation,
424 "first_test": self.first_test,
425 "mu": self.mu,
426 "max_iterations": self.max_iterations,
427 }
429 @staticmethod
430 def from_file(scenario_file: Path) -> IRACEScenario:
431 """Reads scenario file and initalises IRACEScenario."""
432 scenario_dict = {keyvalue[0]: keyvalue[1]
433 for keyvalue in (line.split(" = ", maxsplit=1)
434 for line in scenario_file.open().readlines()
435 if line.strip() != "")}
436 _, solver_path, objective, cutoff, _, _, _, _, _ =\
437 scenario_dict.pop("targetCmdline").split(" ")
438 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)]
439 scenario_dict["cutoff_time"] = int(cutoff)
440 scenario_dict["parent_directory"] = scenario_file.parent.parent
441 scenario_dict["number_of_runs"] =\
442 len([p for p in (scenario_file.parent / "results").iterdir()])
443 scenario_dict.pop("targetRunner")
444 scenario_dict.pop("execDir")
445 scenario_dict.pop("targetRunnerLauncher")
446 scenario_dict.pop("deterministic")
447 scenario_dict.pop("parameterFile")
448 scenario_dict.pop("debugLevel")
449 instance_set_path =\
450 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"'))
451 instance_set = Instance_Set(instance_set_path)
452 solver = Solver(Path(solver_path.strip()))
453 scenario_dict.pop("trainInstancesFile")
454 # Replace keys with scenario variable names
455 if "budgetEstimation" in scenario_dict:
456 scenario_dict["budget_estimation"] =\
457 float(scenario_dict.pop(("budgetEstimation")))
458 if "firstTest" in scenario_dict:
459 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest"))
460 if "mu" in scenario_dict:
461 scenario_dict["mu"] = int(scenario_dict.pop("mu"))
462 if "nbIterations" in scenario_dict:
463 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations"))
464 if "maxExperiments" in scenario_dict:
465 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments"))
466 if "maxTime" in scenario_dict:
467 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime"))
469 return IRACEScenario(solver, instance_set, **scenario_dict)