Coverage for sparkle/configurator/implementations/irace.py: 78%
178 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
1"""Configurator classes to implement IRACE in Sparkle."""
2from __future__ import annotations
3import shutil
4import subprocess
5from pathlib import Path
7from sparkle.configurator.configurator import Configurator, ConfigurationScenario
8from sparkle.solver import Solver
9from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
10from sparkle.instance import InstanceSet, Instance_Set
11from sparkle.types import SparkleObjective, resolve_objective
13import runrunner as rrr
14from runrunner import Runner, Run
17class IRACE(Configurator):
18 """Class for IRACE configurator."""
19 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
20 "Components/irace-v3.5"
21 configurator_package = configurator_path / "irace_3.5.tar.gz"
22 r6_dependency_package = configurator_path / "R6_2.5.1.tar.gz"
23 configurator_executable = configurator_path / "irace" / "bin" / "irace"
24 configurator_ablation_executable = configurator_path / "irace" / "bin" / "ablation"
25 configurator_target = configurator_path / "irace_target_algorithm.py"
27 version = "3.5"
28 full_name = "Iterated Racing for Automatic Algorithm Configuration"
30 def __init__(self: Configurator,
31 output_path: Path,
32 base_dir: Path,
33 ) -> None:
34 """Initialize IRACE configurator."""
35 output_path = output_path / IRACE.__name__
36 output_path.mkdir(parents=True, exist_ok=True)
37 super().__init__(output_path=output_path,
38 base_dir=base_dir,
39 tmp_path=output_path / "tmp",
40 multi_objective_support=False)
42 @property
43 def name(self: IRACE) -> str:
44 """Returns the name of the configurator."""
45 return IRACE.__name__
47 @staticmethod
48 def scenario_class() -> ConfigurationScenario:
49 """Returns the IRACE scenario class."""
50 return IRACEScenario
52 def configure(self: IRACE,
53 scenario: ConfigurationScenario,
54 data_target: PerformanceDataFrame,
55 validate_after: bool = True,
56 sbatch_options: list[str] = [],
57 num_parallel_jobs: int = None,
58 base_dir: Path = None,
59 run_on: Runner = Runner.SLURM) -> Run:
60 """Start configuration job.
62 Args:
63 scenario: ConfigurationScenario to execute.
64 data_target: PerformanceDataFrame where to store the found configurations
65 validate_after: Whether to validate the configuration on the training set
66 afterwards or not.
67 sbatch_options: List of slurm batch options to use
68 num_parallel_jobs: The maximum number of jobs to run in parallel
69 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed
70 run_on: On which platform to run the jobs. Default: Slurm.
72 Returns:
73 A RunRunner Run object.
74 """
75 scenario.create_scenario()
76 output_csv = scenario.validation / "configurations.csv"
77 output_csv.parent.mkdir(exist_ok=True, parents=True)
79 # Create command to call IRACE. Create plural based on number of runs var
80 # We set the seed over the last n run ids in the dataframe
81 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:]
82 output_files = [
83 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata"
84 for job_idx in seeds]
85 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
86 f"{IRACE.__name__} {output_path} {data_target.csv_filepath} "
87 f"{scenario.scenario_file_path} {seed} "
88 f"{IRACE.configurator_executable.absolute()} "
89 f"--scenario {scenario.scenario_file_path} "
90 f"--log-file {output_path} "
91 f"--seed {seed}" for seed, output_path in zip(seeds, output_files)]
92 runs = [rrr.add_to_queue(
93 runner=run_on,
94 cmd=cmds,
95 base_dir=base_dir,
96 name=f"{self.name}: {scenario.solver.name} on {scenario.instance_set.name}",
97 sbatch_options=sbatch_options,
98 )]
100 if validate_after:
101 # TODO: Array job specific dependency, requires RunRunner update
102 validate = scenario.solver.run_performance_dataframe(
103 scenario.instance_set,
104 run_ids=seeds,
105 performance_dataframe=data_target,
106 cutoff_time=scenario.cutoff_time,
107 run_on=run_on,
108 sbatch_options=sbatch_options,
109 log_dir=scenario.validation,
110 base_dir=base_dir,
111 dependencies=runs,
112 )
113 runs.append(validate)
115 if run_on == Runner.LOCAL:
116 for run in runs:
117 run.wait()
119 return runs
121 @staticmethod
122 def organise_output(output_source: Path,
123 output_target: Path,
124 scenario: IRACEScenario,
125 run_id: int) -> None | dict:
126 """Method to restructure and clean up after a single configurator call."""
127 from filelock import FileLock
128 get_config = subprocess.run(
129 ["Rscript", "-e",
130 'library("irace"); '
131 f'load("{output_source}"); '
132 "last <- length(iraceResults$iterationElites); "
133 "id <- iraceResults$iterationElites[last]; "
134 "print(getConfigurationById(iraceResults, ids = id))"],
135 capture_output=True)
136 r_table = get_config.stdout.decode()
137 if get_config.returncode != 0 or r_table.strip() == "":
138 raise RuntimeError("Failed to get configuration from IRACE file "
139 f"{output_source}:\n"
140 f"{get_config.stdout.decode()}\n"
141 f"{get_config.stderr.decode()}")
143 # Join the table header and content together
144 header = ""
145 content = ""
146 for i, line in enumerate(r_table.splitlines()):
147 if i & 1 == 0: # Even lines are headers
148 header += line
149 else: # Odd lines are parameter values
150 # First element is the ID
151 line = " ".join(line.split(" ")[1:])
152 content += line
153 # First header item is the ID
154 header = [x for x in header.split(" ") if x != ""][1:]
155 content = [x for x in content.split(" ") if x != ""][1:]
156 configuration = ""
157 for parameter, value in zip(header, content):
158 if not parameter == ".PARENT." and value != "NA" and value != "<NA>":
159 configuration += f"--{parameter} {value} "
160 configuration = Solver.config_str_to_dict(configuration)
161 if output_target is None or not output_target.exists():
162 return configuration
164 time_stamp = scenario.scenario_file_path.stat().st_mtime
165 configuration["configuration_id"] =\
166 f"{IRACE.__name__}_{time_stamp}_{run_id}"
167 instance_names = scenario.instance_set.instance_names
168 lock = FileLock(f"{output_target}.lock")
169 with lock.acquire(timeout=60):
170 performance_data = PerformanceDataFrame(output_target)
171 # Resolve absolute path to Solver column
172 solver = [s for s in performance_data.solvers
173 if Path(s).name == scenario.solver.name][0]
174 # For some reason the instance paths in the instance set are absolute
175 instances = [instance for instance in performance_data.instances
176 if Path(instance).name in instance_names]
177 # We don't set the seed in the dataframe, as that should be part of the conf
178 performance_data.set_value(
179 value=[str(configuration)],
180 solver=solver,
181 instance=instances,
182 objective=None,
183 run=run_id,
184 solver_fields=[PerformanceDataFrame.column_configuration]
185 )
186 performance_data.save_csv()
188 def get_status_from_logs(self: Configurator) -> None:
189 """Method to scan the log files of the configurator for warnings."""
190 raise NotImplementedError
193class IRACEScenario(ConfigurationScenario):
194 """Class for IRACE scenario."""
196 def __init__(self: ConfigurationScenario,
197 solver: Solver,
198 instance_set: InstanceSet,
199 sparkle_objectives: list[SparkleObjective],
200 parent_directory: Path,
201 number_of_runs: int = None, solver_calls: int = None,
202 cutoff_time: int = None,
203 max_time: int = None,
204 budget_estimation: float = None,
205 first_test: int = None,
206 mu: int = None,
207 max_iterations: int = None,
208 feature_data: FeatureDataFrame = None,
209 )\
210 -> None:
211 """Initialize scenario paths and names.
213 Args:
214 solver: Solver that should be configured.
215 instance_set: Instances object for the scenario.
216 sparkle_objectives: SparkleObjectives used for each run of the configuration.
217 Will be simplified to the first objective.
218 parent_directory: Path where the scenario files will be placed.
219 number_of_runs: The number of configurator runs to perform
220 for configuring the solver.
221 solver_calls: The number of times the solver is called for each
222 configuration run. [MaxExperiments]
223 cutoff_time: The maximum time allowed for each individual run during
224 configuration.
225 max_time: The time budget (CPU) allocated for the sum of solver calls
226 done by the configurator in seconds. [MaxTime]
227 budget_estimation: Fraction (smaller than 1) of the budget used to estimate
228 the mean computation time of a configuration. Only used when maxTime > 0.
229 Default: Computed as cutoff_time / max_time. [BudgetEstimation]
230 first_test: Specifies how many instances are evaluated before the first
231 elimination test. IRACE Default: 5. [firstTest]
232 mu: Parameter used to define the number of configurations sampled and
233 evaluated at each iteration. IRACE Default: 5. [mu]
234 max_iterations: Maximum number of iterations to be executed. Each iteration
235 involves the generation of new configurations and the use of racing to
236 select the best configurations. By default (with 0), irace calculates a
237 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where
238 N^param is the number of non-fixed parameters to be tuned.
239 Setting this parameter may make irace stop sooner than it should without
240 using all the available budget. We recommend to use the default value.
241 feature_data: FeatureDataFrame object with the feature data.
242 Currently not supported by IRACE.
243 """
244 """
245 Other possible arguments that are not added yet to Sparkle:
246 --test-num-elites Number of elite configurations returned by irace that
247 will be tested if test instances are provided.
248 Default: 1.
249 --test-iteration-elites Enable/disable testing the elite configurations
250 found at each iteration. Default: 0.
251 --test-type Statistical test used for elimination. The default
252 value selects t-test if capping is enabled or F-test,
253 otherwise. Valid values are: F-test (Friedman test),
254 t-test (pairwise t-tests with no correction),
255 t-test-bonferroni (t-test with Bonferroni's correction
256 for multiple comparisons), t-test-holm (t-test with
257 Holm's correction for multiple comparisons).
258 --each-test Number of instances evaluated between elimination
259 tests. Default: 1.
260 --load-balancing Enable/disable load-balancing when executing
261 experiments in parallel. Load-balancing makes better
262 use of computing resources, but increases
263 communication overhead. If this overhead is large,
264 disabling load-balancing may be faster. Default: 1.
265 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner
266 in parallel (parameter parallel is the number of
267 slaves). Default: 0.
268 --batchmode Specify how irace waits for jobs to finish when
269 targetRunner submits jobs to a batch cluster: sge,
270 pbs, torque, slurm or htcondor. targetRunner must
271 submit jobs to the cluster using, for example, qsub.
272 Default: 0.
273 --digits Maximum number of decimal places that are significant
274 for numerical (real) parameters. Default: 4.
275 --soft-restart Enable/disable the soft restart strategy that avoids
276 premature convergence of the probabilistic model.
277 Default: 1.
278 --soft-restart-threshold Soft restart threshold value for numerical
279 parameters. If NA, NULL or "", it is computed as
280 10^-digits.
281 -e,--elitist Enable/disable elitist irace. Default: 1.
282 --elitist-new-instances Number of instances added to the execution list
283 before previous instances in elitist irace. Default:
284 1.
285 --elitist-limit In elitist irace, maximum number per race of
286 elimination tests that do not eliminate a
287 configuration. Use 0 for no limit. Default: 2.
288 --capping Enable the use of adaptive capping, a technique
289 designed for minimizing the computation time of
290 configurations. This is only available when elitist is
291 active. Default: 0.
292 --capping-type Measure used to obtain the execution bound from the
293 performance of the elite configurations: median, mean,
294 worst, best. Default: median.
295 --bound-type Method to calculate the mean performance of elite
296 configurations: candidate or instance. Default:
297 candidate.
298 --bound-max Maximum execution bound for targetRunner. It must be
299 specified when capping is enabled. Default: 0.
300 --bound-digits Precision used for calculating the execution time. It
301 must be specified when capping is enabled. Default: 0.
302 --bound-par Penalization constant for timed out executions
303 (executions that reach boundMax execution time).
304 Default: 1.
305 --bound-as-timeout Replace the configuration cost of bounded executions
306 with boundMax. Default: 1.
307 --postselection Percentage of the configuration budget used to perform
308 a postselection race of the best configurations of
309 each iteration after the execution of irace. Default:
310 0.
311 --iterations Maximum number of iterations. Default: 0.
312 --experiments-per-iteration Number of runs of the target algorithm per
313 iteration. Default: 0.
314 --min-survival Minimum number of configurations needed to continue
315 the execution of each race (iteration). Default: 0.
316 --num-configurations Number of configurations to be sampled and evaluated
317 at each iteration. Default: 0.
318 --confidence Confidence level for the elimination test. Default:
319 0.95."""
320 super().__init__(solver, instance_set, sparkle_objectives, parent_directory)
321 self.solver = solver
322 self.instance_set = instance_set
323 if sparkle_objectives is not None:
324 if len(sparkle_objectives) > 1:
325 print("WARNING: IRACE does not have multi objective support. "
326 "Only the first objective will be used.")
327 self.sparkle_objective = sparkle_objectives[0]
328 else:
329 self.sparkle_objective = None
331 if feature_data is not None:
332 print("WARNING: Instance features currently not supported by IRACE.")
334 self.number_of_runs = number_of_runs
335 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None
336 self.max_time = max_time if max_time and max_time > 0 else None
337 self.cutoff_time = cutoff_time
338 self.budget_estimation = budget_estimation
339 self.first_test = first_test
340 self.mu = mu
341 self.max_iterations = max_iterations
343 # Pathing
344 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
345 self.tmp = self.directory / "tmp"
346 self.validation = self.directory / "validation"
347 self.results_directory = self.directory / "results"
349 def create_scenario(self: IRACEScenario) -> None:
350 """Create scenario with solver and instances in the parent directory.
352 This prepares all the necessary subdirectories related to configuration.
353 Removes any existing directory if it overlaps with the scenario name.
355 Args:
356 parent_directory: Directory in which the scenario should be created.
357 """
358 # Set up directories
359 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory
360 self.directory.mkdir(exist_ok=True, parents=True)
361 self.tmp.mkdir(exist_ok=True)
362 self.validation.mkdir(exist_ok=True)
363 self.results_directory.mkdir(exist_ok=True)
365 with self.instance_file_path.open("w+") as file:
366 for instance_path in self.instance_set._instance_paths:
367 file.write(f"{instance_path.name}\n")
368 self.create_scenario_file()
370 def create_scenario_file(self: ConfigurationScenario) -> Path:
371 """Create a file from the IRACE scenario.
373 Returns:
374 Path to the created file.
375 """
376 solver_path = self.solver.directory.absolute()
377 with self.scenario_file_path.open("w") as file:
378 file.write(
379 f'execDir = "{self.directory.absolute()}"\n'
380 'targetRunnerLauncher = "python3"\n'
381 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n'
382 'targetRunnerLauncherArgs = "{targetRunner} '
383 f"{solver_path} {self.sparkle_objective} {self.cutoff_time} "
384 '{targetRunnerArgs}"\n'
385 f"deterministic = {1 if self.solver.deterministic else 0}\n"
386 "parameterFile = "
387 f'"{self.solver.get_pcs_file(port_type="""IRACE""").absolute()}"\n'
388 "forbiddenFile = "
389 f'"{self.solver.get_forbidden(port_type="""IRACE""").absolute()}"\n'
390 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n'
391 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n'
392 "debugLevel = 1\n" # The verbosity level of IRACE
393 )
394 if self.solver_calls is not None:
395 file.write(f"maxExperiments = {self.solver_calls}\n")
396 elif self.max_time is not None:
397 file.write(f"maxTime = {self.max_time}\n")
398 if self.solver_calls is not None and self.max_time is not None:
399 print("WARNING: Both solver calls and max time specified for scenario. "
400 "This is not supported by IRACE, defaulting to solver calls.")
401 elif self.solver_calls is None and self.max_time is None:
402 print("WARNING: Neither solver calls nor max time specified. "
403 "Either budget is required for the IRACE scenario.")
404 if self.max_time is not None and self.budget_estimation is None:
405 # Auto Estimate
406 if self.cutoff_time < self.max_time:
407 self.budget_estimation = self.cutoff_time / self.max_time
408 file.write(f"budgetEstimation = {self.budget_estimation}\n")
409 if self.first_test is not None:
410 file.write(f"firstTest = {self.first_test}\n")
411 if self.mu is not None:
412 file.write(f"mu = {self.mu}\n")
413 if self.max_iterations is not None:
414 file.write(f"nbIterations = {self.max_iterations}\n")
415 print("Verifying contents of IRACE scenario file and testing solver call...")
416 check_file = subprocess.run(
417 [f"{IRACE.configurator_executable.absolute()}",
418 "-s", f"{self.scenario_file_path.absolute()}", "--check"],
419 capture_output=True)
420 if check_file.returncode != 0:
421 stdout_msg = "\n".join([
422 line for line in check_file.stdout.decode().splitlines()
423 if not line.startswith("#")])
424 print("An error occured in the IRACE scenario file:\n",
425 self.scenario_file_path.open("r").read(),
426 stdout_msg, "\n",
427 check_file.stderr.decode())
428 else:
429 print("IRACE scenario file is valid.")
430 return self.scenario_file_path
432 def serialize(self: IRACEScenario) -> dict:
433 """Serialize the IRACE scenario."""
434 return {
435 "number_of_runs": self.number_of_runs,
436 "solver_calls": self.solver_calls,
437 "max_time": self.max_time,
438 "cutoff_time": self.cutoff_time,
439 "budget_estimation": self.budget_estimation,
440 "first_test": self.first_test,
441 "mu": self.mu,
442 "max_iterations": self.max_iterations,
443 }
445 @staticmethod
446 def from_file(scenario_file: Path) -> IRACEScenario:
447 """Reads scenario file and initalises IRACEScenario."""
448 scenario_dict = {keyvalue[0]: keyvalue[1]
449 for keyvalue in (line.split(" = ", maxsplit=1)
450 for line in scenario_file.open().readlines()
451 if line.strip() != "")}
452 _, solver_path, objective, cutoff, _ =\
453 scenario_dict.pop("targetRunnerLauncherArgs").split(" ")
454 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)]
455 scenario_dict["cutoff_time"] = int(cutoff)
456 scenario_dict["parent_directory"] = scenario_file.parent.parent
457 scenario_dict["number_of_runs"] =\
458 len([p for p in (scenario_file.parent / "results").iterdir()])
459 scenario_dict.pop("targetRunner")
460 scenario_dict.pop("execDir")
461 scenario_dict.pop("targetRunnerLauncher")
462 scenario_dict.pop("deterministic")
463 scenario_dict.pop("parameterFile")
464 scenario_dict.pop("forbiddenFile")
465 scenario_dict.pop("debugLevel")
466 instance_set_path =\
467 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"'))
468 instance_set = Instance_Set(instance_set_path)
469 solver = Solver(Path(solver_path.strip()))
470 scenario_dict.pop("trainInstancesFile")
471 # Replace keys with scenario variable names
472 if "budgetEstimation" in scenario_dict:
473 scenario_dict["budget_estimation"] =\
474 float(scenario_dict.pop(("budgetEstimation")))
475 if "firstTest" in scenario_dict:
476 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest"))
477 if "mu" in scenario_dict:
478 scenario_dict["mu"] = int(scenario_dict.pop("mu"))
479 if "nbIterations" in scenario_dict:
480 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations"))
481 if "maxExperiments" in scenario_dict:
482 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments"))
483 if "maxTime" in scenario_dict:
484 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime"))
486 return IRACEScenario(solver, instance_set, **scenario_dict)