Coverage for sparkle/configurator/implementations/irace.py: 48%
178 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
1"""Configurator classes to implement IRACE in Sparkle."""
2from __future__ import annotations
3import shutil
4import subprocess
5from pathlib import Path
7from sparkle.configurator.configurator import Configurator, ConfigurationScenario
8from sparkle.solver import Solver
9from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
10from sparkle.instance import InstanceSet, Instance_Set
11from sparkle.types import SparkleObjective, resolve_objective
13import runrunner as rrr
14from runrunner import Runner, Run
17class IRACE(Configurator):
18 """Class for IRACE configurator."""
19 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
20 "Components/irace-v4.2.0"
21 configurator_package = configurator_path / "irace_4.2.0.tar"
22 # NOTE: There are possible dependencies that we do not install here.
23 # TODO: Determine if we should add them or not.
24 package_dependencies = ["codetools_0.2-20.tar", "data.table_1.16.4.tar",
25 "matrixStats_1.5.0.tar", "spacefillr_0.3.3.tar"]
26 configurator_executable = configurator_path / "irace" / "bin" / "irace"
27 configurator_ablation_executable = configurator_path / "irace" / "bin" / "ablation"
28 configurator_target = configurator_path / "irace_target_algorithm.py"
30 version = "3.5"
31 full_name = "Iterated Racing for Automatic Algorithm Configuration"
33 def __init__(self: Configurator,
34 output_path: Path,
35 base_dir: Path,
36 ) -> None:
37 """Initialize IRACE configurator."""
38 output_path = output_path / IRACE.__name__
39 output_path.mkdir(parents=True, exist_ok=True)
40 super().__init__(output_path=output_path,
41 base_dir=base_dir,
42 tmp_path=output_path / "tmp",
43 multi_objective_support=False)
45 @property
46 def name(self: IRACE) -> str:
47 """Returns the name of the configurator."""
48 return IRACE.__name__
50 @staticmethod
51 def scenario_class() -> ConfigurationScenario:
52 """Returns the IRACE scenario class."""
53 return IRACEScenario
55 def configure(self: IRACE,
56 scenario: ConfigurationScenario,
57 data_target: PerformanceDataFrame,
58 validate_after: bool = True,
59 sbatch_options: list[str] = [],
60 slurm_prepend: str | list[str] | Path = None,
61 num_parallel_jobs: int = None,
62 base_dir: Path = None,
63 run_on: Runner = Runner.SLURM) -> Run:
64 """Start configuration job.
66 Args:
67 scenario: ConfigurationScenario to execute.
68 data_target: PerformanceDataFrame where to store the found configurations
69 validate_after: Whether to validate the configuration on the training set
70 afterwards or not.
71 sbatch_options: List of slurm batch options to use
72 slurm_prepend: Slurm script to prepend to the sbatch
73 num_parallel_jobs: The maximum number of jobs to run in parallel
74 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed
75 run_on: On which platform to run the jobs. Default: Slurm.
77 Returns:
78 A RunRunner Run object.
79 """
80 scenario.create_scenario()
81 output_csv = scenario.validation / "configurations.csv"
82 output_csv.parent.mkdir(exist_ok=True, parents=True)
84 # Create command to call IRACE. Create plural based on number of runs var
85 # We set the seed over the last n run ids in the dataframe
86 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:]
87 output_files = [
88 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata"
89 for job_idx in seeds]
90 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
91 f"{IRACE.__name__} {output_path} {data_target.csv_filepath} "
92 f"{scenario.scenario_file_path} {seed} "
93 f"{IRACE.configurator_executable.absolute()} "
94 f"--scenario {scenario.scenario_file_path} "
95 f"--log-file {output_path} "
96 f"--seed {seed}" for seed, output_path in zip(seeds, output_files)]
97 runs = [rrr.add_to_queue(
98 runner=run_on,
99 cmd=cmds,
100 base_dir=base_dir,
101 name=f"{self.name}: {scenario.solver.name} on {scenario.instance_set.name}",
102 sbatch_options=sbatch_options,
103 prepend=slurm_prepend,
104 )]
106 if validate_after:
107 validate = scenario.solver.run_performance_dataframe(
108 scenario.instance_set,
109 run_ids=seeds,
110 performance_dataframe=data_target,
111 cutoff_time=scenario.cutoff_time,
112 run_on=run_on,
113 sbatch_options=sbatch_options,
114 log_dir=scenario.validation,
115 base_dir=base_dir,
116 dependencies=runs,
117 slurm_prepend=slurm_prepend
118 )
119 runs.append(validate)
121 if run_on == Runner.LOCAL:
122 for run in runs:
123 run.wait()
125 return runs
127 @staticmethod
128 def organise_output(output_source: Path,
129 output_target: Path,
130 scenario: IRACEScenario,
131 run_id: int) -> None | dict:
132 """Method to restructure and clean up after a single configurator call."""
133 from filelock import FileLock
134 get_config = subprocess.run(
135 ["Rscript", "-e",
136 'library("irace"); '
137 f'load("{output_source}"); '
138 "last <- length(iraceResults$iterationElites); "
139 "id <- iraceResults$iterationElites[last]; "
140 "print(getConfigurationById(iraceResults, ids = id))"],
141 capture_output=True)
142 r_table = get_config.stdout.decode()
143 if get_config.returncode != 0 or r_table.strip() == "":
144 raise RuntimeError("Failed to get configuration from IRACE file "
145 f"{output_source}:\n"
146 f"{get_config.stdout.decode()}\n"
147 f"{get_config.stderr.decode()}")
149 # Join the table header and content together
150 header = ""
151 content = ""
152 for i, line in enumerate(r_table.splitlines()):
153 if i & 1 == 0: # Even lines are headers
154 header += line
155 else: # Odd lines are parameter values
156 # First element is the ID
157 line = " ".join(line.split(" ")[1:])
158 content += line
159 # First header item is the ID
160 header = [x for x in header.split(" ") if x != ""][1:]
161 content = [x for x in content.split(" ") if x != ""][1:]
162 configuration = ""
163 for parameter, value in zip(header, content):
164 if not parameter == ".PARENT." and value != "NA" and value != "<NA>":
165 configuration += f"--{parameter} {value} "
166 configuration = Solver.config_str_to_dict(configuration)
167 if output_target is None or not output_target.exists():
168 return configuration
170 time_stamp = scenario.scenario_file_path.stat().st_mtime
171 configuration["configuration_id"] =\
172 f"{IRACE.__name__}_{time_stamp}_{run_id}"
173 instance_names = scenario.instance_set.instance_names
174 lock = FileLock(f"{output_target}.lock")
175 with lock.acquire(timeout=60):
176 performance_data = PerformanceDataFrame(output_target)
177 # Resolve absolute path to Solver column
178 solver = [s for s in performance_data.solvers
179 if Path(s).name == scenario.solver.name][0]
180 # For some reason the instance paths in the instance set are absolute
181 instances = [instance for instance in performance_data.instances
182 if Path(instance).name in instance_names]
183 # We don't set the seed in the dataframe, as that should be part of the conf
184 performance_data.set_value(
185 value=[str(configuration)],
186 solver=solver,
187 instance=instances,
188 objective=None,
189 run=run_id,
190 solver_fields=[PerformanceDataFrame.column_configuration]
191 )
192 performance_data.save_csv()
194 def get_status_from_logs(self: Configurator) -> None:
195 """Method to scan the log files of the configurator for warnings."""
196 raise NotImplementedError
199class IRACEScenario(ConfigurationScenario):
200 """Class for IRACE scenario."""
202 def __init__(self: ConfigurationScenario,
203 solver: Solver,
204 instance_set: InstanceSet,
205 sparkle_objectives: list[SparkleObjective],
206 parent_directory: Path,
207 number_of_runs: int = None, solver_calls: int = None,
208 cutoff_time: int = None,
209 max_time: int = None,
210 budget_estimation: float = None,
211 first_test: int = None,
212 mu: int = None,
213 max_iterations: int = None,
214 feature_data: FeatureDataFrame = None,
215 )\
216 -> None:
217 """Initialize scenario paths and names.
219 Args:
220 solver: Solver that should be configured.
221 instance_set: Instances object for the scenario.
222 sparkle_objectives: SparkleObjectives used for each run of the configuration.
223 Will be simplified to the first objective.
224 parent_directory: Path where the scenario files will be placed.
225 number_of_runs: The number of configurator runs to perform
226 for configuring the solver.
227 solver_calls: The number of times the solver is called for each
228 configuration run. [MaxExperiments]
229 cutoff_time: The maximum time allowed for each individual run during
230 configuration.
231 max_time: The time budget (CPU) allocated for the sum of solver calls
232 done by the configurator in seconds. [MaxTime]
233 budget_estimation: Fraction (smaller than 1) of the budget used to estimate
234 the mean computation time of a configuration. Only used when maxTime > 0.
235 Default: Computed as cutoff_time / max_time. [BudgetEstimation]
236 first_test: Specifies how many instances are evaluated before the first
237 elimination test. IRACE Default: 5. [firstTest]
238 mu: Parameter used to define the number of configurations sampled and
239 evaluated at each iteration. IRACE Default: 5. [mu]
240 max_iterations: Maximum number of iterations to be executed. Each iteration
241 involves the generation of new configurations and the use of racing to
242 select the best configurations. By default (with 0), irace calculates a
243 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where
244 N^param is the number of non-fixed parameters to be tuned.
245 Setting this parameter may make irace stop sooner than it should without
246 using all the available budget. We recommend to use the default value.
247 feature_data: FeatureDataFrame object with the feature data.
248 Currently not supported by IRACE.
249 """
250 """
251 Other possible arguments that are not added yet to Sparkle:
252 --test-num-elites Number of elite configurations returned by irace that
253 will be tested if test instances are provided.
254 Default: 1.
255 --test-iteration-elites Enable/disable testing the elite configurations
256 found at each iteration. Default: 0.
257 --test-type Statistical test used for elimination. The default
258 value selects t-test if capping is enabled or F-test,
259 otherwise. Valid values are: F-test (Friedman test),
260 t-test (pairwise t-tests with no correction),
261 t-test-bonferroni (t-test with Bonferroni's correction
262 for multiple comparisons), t-test-holm (t-test with
263 Holm's correction for multiple comparisons).
264 --each-test Number of instances evaluated between elimination
265 tests. Default: 1.
266 --load-balancing Enable/disable load-balancing when executing
267 experiments in parallel. Load-balancing makes better
268 use of computing resources, but increases
269 communication overhead. If this overhead is large,
270 disabling load-balancing may be faster. Default: 1.
271 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner
272 in parallel (parameter parallel is the number of
273 slaves). Default: 0.
274 --batchmode Specify how irace waits for jobs to finish when
275 targetRunner submits jobs to a batch cluster: sge,
276 pbs, torque, slurm or htcondor. targetRunner must
277 submit jobs to the cluster using, for example, qsub.
278 Default: 0.
279 --digits Maximum number of decimal places that are significant
280 for numerical (real) parameters. Default: 4.
281 --soft-restart Enable/disable the soft restart strategy that avoids
282 premature convergence of the probabilistic model.
283 Default: 1.
284 --soft-restart-threshold Soft restart threshold value for numerical
285 parameters. If NA, NULL or "", it is computed as
286 10^-digits.
287 -e,--elitist Enable/disable elitist irace. Default: 1.
288 --elitist-new-instances Number of instances added to the execution list
289 before previous instances in elitist irace. Default:
290 1.
291 --elitist-limit In elitist irace, maximum number per race of
292 elimination tests that do not eliminate a
293 configuration. Use 0 for no limit. Default: 2.
294 --capping Enable the use of adaptive capping, a technique
295 designed for minimizing the computation time of
296 configurations. This is only available when elitist is
297 active. Default: 0.
298 --capping-type Measure used to obtain the execution bound from the
299 performance of the elite configurations: median, mean,
300 worst, best. Default: median.
301 --bound-type Method to calculate the mean performance of elite
302 configurations: candidate or instance. Default:
303 candidate.
304 --bound-max Maximum execution bound for targetRunner. It must be
305 specified when capping is enabled. Default: 0.
306 --bound-digits Precision used for calculating the execution time. It
307 must be specified when capping is enabled. Default: 0.
308 --bound-par Penalization constant for timed out executions
309 (executions that reach boundMax execution time).
310 Default: 1.
311 --bound-as-timeout Replace the configuration cost of bounded executions
312 with boundMax. Default: 1.
313 --postselection Percentage of the configuration budget used to perform
314 a postselection race of the best configurations of
315 each iteration after the execution of irace. Default:
316 0.
317 --iterations Maximum number of iterations. Default: 0.
318 --experiments-per-iteration Number of runs of the target algorithm per
319 iteration. Default: 0.
320 --min-survival Minimum number of configurations needed to continue
321 the execution of each race (iteration). Default: 0.
322 --num-configurations Number of configurations to be sampled and evaluated
323 at each iteration. Default: 0.
324 --confidence Confidence level for the elimination test. Default:
325 0.95."""
326 super().__init__(solver, instance_set, sparkle_objectives, parent_directory)
327 self.solver = solver
328 self.instance_set = instance_set
329 if sparkle_objectives is not None:
330 self.sparkle_objective = sparkle_objectives[0]
331 else:
332 self.sparkle_objective = None
334 if feature_data is not None:
335 print("WARNING: Instance features currently not supported by IRACE.")
337 self.number_of_runs = number_of_runs
338 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None
339 self.max_time = max_time if max_time and max_time > 0 else None
340 self.cutoff_time = cutoff_time
341 self.budget_estimation = budget_estimation
342 self.first_test = first_test
343 self.mu = mu
344 self.max_iterations = max_iterations
346 # Pathing
347 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
348 self.tmp = self.directory / "tmp"
349 self.validation = self.directory / "validation"
350 self.results_directory = self.directory / "results"
352 def create_scenario(self: IRACEScenario) -> None:
353 """Create scenario with solver and instances in the parent directory.
355 This prepares all the necessary subdirectories related to configuration.
356 Removes any existing directory if it overlaps with the scenario name.
358 Args:
359 parent_directory: Directory in which the scenario should be created.
360 """
361 # Set up directories
362 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory
363 self.directory.mkdir(exist_ok=True, parents=True)
364 self.tmp.mkdir(exist_ok=True)
365 self.validation.mkdir(exist_ok=True)
366 self.results_directory.mkdir(exist_ok=True)
368 with self.instance_file_path.open("w+") as file:
369 for instance_path in self.instance_set._instance_paths:
370 file.write(f"{instance_path.name}\n")
371 self.create_scenario_file()
373 def create_scenario_file(self: ConfigurationScenario) -> Path:
374 """Create a file from the IRACE scenario.
376 Returns:
377 Path to the created file.
378 """
379 from sparkle.tools.parameters import PCSConvention
380 solver_path = self.solver.directory.absolute()
381 pcs_path = self.solver.get_pcs_file(port_type=PCSConvention.IRACE).absolute()
382 with self.scenario_file_path.open("w") as file:
383 file.write(
384 f'execDir = "{self.directory.absolute()}"\n'
385 'targetRunnerLauncher = "python3"\n'
386 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n'
387 'targetCmdline = "{targetRunner} '
388 f"{solver_path} {self.sparkle_objective} {self.cutoff_time} "
389 '{configurationID} {instanceID} {seed} {instance} {targetRunnerArgs}"\n'
390 f"deterministic = {1 if self.solver.deterministic else 0}\n"
391 f'parameterFile = "{pcs_path.absolute()}"\n'
392 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n'
393 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n'
394 "debugLevel = 1\n" # The verbosity level of IRACE
395 )
396 if self.solver_calls is not None:
397 file.write(f"maxExperiments = {self.solver_calls}\n")
398 elif self.max_time is not None:
399 file.write(f"maxTime = {self.max_time}\n")
400 if self.solver_calls is not None and self.max_time is not None:
401 print("WARNING: Both solver calls and max time specified for scenario. "
402 "This is not supported by IRACE, defaulting to solver calls.")
403 elif self.solver_calls is None and self.max_time is None:
404 print("WARNING: Neither solver calls nor max time specified. "
405 "Either budget is required for the IRACE scenario.")
406 if self.max_time is not None and self.budget_estimation is None:
407 # Auto Estimate
408 if self.cutoff_time < self.max_time:
409 self.budget_estimation = self.cutoff_time / self.max_time
410 file.write(f"budgetEstimation = {self.budget_estimation}\n")
411 if self.first_test is not None:
412 file.write(f"firstTest = {self.first_test}\n")
413 if self.mu is not None:
414 file.write(f"mu = {self.mu}\n")
415 if self.max_iterations is not None:
416 file.write(f"nbIterations = {self.max_iterations}\n")
417 print("Verifying contents of IRACE scenario file and testing solver call...")
418 check_file = subprocess.run(
419 [f"{IRACE.configurator_executable.absolute()}",
420 "-s", f"{self.scenario_file_path.absolute()}", "--check"],
421 capture_output=True)
422 if check_file.returncode != 0:
423 stdout_msg = "\n".join([
424 line for line in check_file.stdout.decode().splitlines()
425 if not line.startswith("#")])
426 print("An error occured in the IRACE scenario file:\n",
427 self.scenario_file_path.open("r").read(),
428 stdout_msg, "\n",
429 check_file.stderr.decode())
430 return None
431 print("IRACE scenario file is valid.")
432 return self.scenario_file_path
434 def serialize(self: IRACEScenario) -> dict:
435 """Serialize the IRACE scenario."""
436 return {
437 "number_of_runs": self.number_of_runs,
438 "solver_calls": self.solver_calls,
439 "max_time": self.max_time,
440 "cutoff_time": self.cutoff_time,
441 "budget_estimation": self.budget_estimation,
442 "first_test": self.first_test,
443 "mu": self.mu,
444 "max_iterations": self.max_iterations,
445 }
447 @staticmethod
448 def from_file(scenario_file: Path) -> IRACEScenario:
449 """Reads scenario file and initalises IRACEScenario."""
450 scenario_dict = {keyvalue[0]: keyvalue[1]
451 for keyvalue in (line.split(" = ", maxsplit=1)
452 for line in scenario_file.open().readlines()
453 if line.strip() != "")}
454 _, solver_path, objective, cutoff, _, _, _, _, _ =\
455 scenario_dict.pop("targetCmdline").split(" ")
456 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)]
457 scenario_dict["cutoff_time"] = int(cutoff)
458 scenario_dict["parent_directory"] = scenario_file.parent.parent
459 scenario_dict["number_of_runs"] =\
460 len([p for p in (scenario_file.parent / "results").iterdir()])
461 scenario_dict.pop("targetRunner")
462 scenario_dict.pop("execDir")
463 scenario_dict.pop("targetRunnerLauncher")
464 scenario_dict.pop("deterministic")
465 scenario_dict.pop("parameterFile")
466 scenario_dict.pop("debugLevel")
467 instance_set_path =\
468 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"'))
469 instance_set = Instance_Set(instance_set_path)
470 solver = Solver(Path(solver_path.strip()))
471 scenario_dict.pop("trainInstancesFile")
472 # Replace keys with scenario variable names
473 if "budgetEstimation" in scenario_dict:
474 scenario_dict["budget_estimation"] =\
475 float(scenario_dict.pop(("budgetEstimation")))
476 if "firstTest" in scenario_dict:
477 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest"))
478 if "mu" in scenario_dict:
479 scenario_dict["mu"] = int(scenario_dict.pop("mu"))
480 if "nbIterations" in scenario_dict:
481 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations"))
482 if "maxExperiments" in scenario_dict:
483 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments"))
484 if "maxTime" in scenario_dict:
485 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime"))
487 return IRACEScenario(solver, instance_set, **scenario_dict)