Coverage for sparkle/configurator/implementations/irace.py: 83%
164 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
1"""Configurator classes to implement IRACE in Sparkle."""
2from __future__ import annotations
3import shutil
4import subprocess
5from pathlib import Path
7from sparkle.configurator.configurator import Configurator, ConfigurationScenario
8from sparkle.solver import Solver, Validator
9from sparkle.instance import InstanceSet, Instance_Set
10from sparkle.types import SparkleObjective, resolve_objective
12import runrunner as rrr
13from runrunner import Runner, Run
16class IRACE(Configurator):
17 """Class for IRACE configurator."""
18 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
19 "Components/irace-v3.5"
20 configurator_package = configurator_path / "irace_3.5.tar.gz"
21 r6_dependency_package = configurator_path / "R6_2.5.1.tar.gz"
22 configurator_executable = configurator_path / "irace" / "bin" / "irace"
23 configurator_ablation_executable = configurator_path / "irace" / "bin" / "ablation"
24 configurator_target = configurator_path / "irace_target_algorithm.py"
26 version = "3.5"
27 full_name = "Iterated Racing for Automatic Algorithm Configuration"
29 def __init__(self: Configurator,
30 output_path: Path,
31 base_dir: Path,
32 ) -> None:
33 """Initialize IRACE configurator."""
34 output_path = output_path / IRACE.__name__
35 output_path.mkdir(parents=True, exist_ok=True)
36 validator = Validator(out_dir=output_path)
37 super().__init__(validator=validator,
38 output_path=output_path,
39 base_dir=base_dir,
40 tmp_path=output_path / "tmp",
41 multi_objective_support=False)
43 @property
44 def name(self: IRACE) -> str:
45 """Returns the name of the configurator."""
46 return IRACE.__name__
48 @property
49 def scenario_class(self: IRACE) -> ConfigurationScenario:
50 """Returns the IRACE scenario class."""
51 return IRACEScenario
53 def configure(self: IRACE,
54 scenario: ConfigurationScenario,
55 validate_after: bool = True,
56 sbatch_options: list[str] = [],
57 num_parallel_jobs: int = None,
58 base_dir: Path = None,
59 run_on: Runner = Runner.SLURM) -> Run:
60 """Start configuration job.
62 Args:
63 scenario: ConfigurationScenario to execute.
64 validate_after: Whether to validate the configuration on the training set
65 afterwards or not.
66 sbatch_options: List of slurm batch options to use
67 num_parallel_jobs: The maximum number of jobs to run in parallel
68 base_dir: The base_dir of RunRunner where the sbatch scripts will be placed
69 run_on: On which platform to run the jobs. Default: Slurm.
71 Returns:
72 A RunRunner Run object.
73 """
74 scenario.create_scenario()
75 output_csv = scenario.validation / "configurations.csv"
76 output_csv.parent.mkdir(exist_ok=True, parents=True)
78 # Create command to call IRACE. Create plural based on number of runs var
79 output_files = [
80 scenario.results_directory.absolute() / f"output_{job_idx}.Rdata"
81 for job_idx in range(0, scenario.number_of_runs)]
82 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
83 f"{IRACE.__name__} {output_files[job_idx]} {output_csv.absolute()} "
84 f"{IRACE.configurator_executable.absolute()} "
85 f"--scenario {scenario.scenario_file_path.absolute()} "
86 f"--log-file {output_files[job_idx]} "
87 f"--seed {job_idx}" for job_idx in range(0, scenario.number_of_runs)]
88 runs = [rrr.add_to_queue(
89 runner=run_on,
90 cmd=cmds,
91 base_dir=base_dir,
92 name=f"IRACE: {scenario.solver.name} on {scenario.instance_set.name}",
93 sbatch_options=sbatch_options,
94 )]
95 if validate_after:
96 self.validator.out_dir = output_csv.parent
97 self.validator.tmp_out_dir = base_dir
98 validate_run = self.validator.validate(
99 [scenario.solver] * scenario.number_of_runs,
100 output_csv,
101 [scenario.instance_set],
102 [scenario.sparkle_objective],
103 scenario.cutoff_time,
104 subdir=Path(),
105 dependency=runs,
106 sbatch_options=sbatch_options,
107 run_on=run_on)
108 runs.append(validate_run)
109 return runs
111 @staticmethod
112 def organise_output(output_source: Path, output_target: Path) -> None | str:
113 """Method to restructure and clean up after a single configurator call."""
114 import fcntl
115 get_config = subprocess.run(
116 ["Rscript", "-e",
117 'library("irace"); '
118 f'load("{output_source}"); '
119 "last <- length(iraceResults$iterationElites); "
120 "id <- iraceResults$iterationElites[last]; "
121 "print(getConfigurationById(iraceResults, ids = id))"],
122 capture_output=True)
123 r_table = get_config.stdout.decode()
124 if get_config.returncode != 0 or r_table.strip() == "":
125 raise RuntimeError("Failed to get configuration from IRACE file "
126 f"{output_source}:\n"
127 f"{get_config.stdout.decode()}\n"
128 f"{get_config.stderr.decode()}")
130 # Join the table header and content together
131 header = ""
132 content = ""
133 for i, line in enumerate(r_table.splitlines()):
134 if i & 1 == 0: # Even lines are headers
135 header += line
136 else: # Odd lines are parameter values
137 # First element is the ID
138 line = " ".join(line.split(" ")[1:])
139 content += line
140 # First header item is the ID
141 header = [x for x in header.split(" ") if x != ""][1:]
142 content = [x for x in content.split(" ") if x != ""][1:]
143 configuration = ""
144 for parameter, value in zip(header, content):
145 if not parameter == ".PARENT." and value != "NA" and value != "<NA>":
146 configuration += f"--{parameter} {value} "
148 with output_target.open("a") as fout:
149 fcntl.flock(fout.fileno(), fcntl.LOCK_EX)
150 fout.write(configuration + "\n")
152 def get_status_from_logs(self: Configurator) -> None:
153 """Method to scan the log files of the configurator for warnings."""
154 raise NotImplementedError
157class IRACEScenario(ConfigurationScenario):
158 """Class for IRACE scenario."""
160 def __init__(self: ConfigurationScenario,
161 solver: Solver,
162 instance_set: InstanceSet,
163 sparkle_objectives: list[SparkleObjective],
164 parent_directory: Path,
165 number_of_runs: int = None, solver_calls: int = None,
166 cutoff_time: int = None,
167 max_time: int = None,
168 budget_estimation: float = None,
169 first_test: int = None,
170 mu: int = None,
171 max_iterations: int = None,
172 )\
173 -> None:
174 """Initialize scenario paths and names.
176 Args:
177 solver: Solver that should be configured.
178 instance_set: Instances object for the scenario.
179 sparkle_objectives: SparkleObjectives used for each run of the configuration.
180 Will be simplified to the first objective.
181 parent_directory: Path where the scenario files will be placed.
182 number_of_runs: The number of configurator runs to perform
183 for configuring the solver.
184 solver_calls: The number of times the solver is called for each
185 configuration run. [MaxExperiments]
186 cutoff_time: The maximum time allowed for each individual run during
187 configuration.
188 max_time: The time budget (CPU) allocated for the sum of solver calls
189 done by the configurator in seconds. [MaxTime]
190 budget_estimation: Fraction (smaller than 1) of the budget used to estimate
191 the mean computation time of a configuration. Only used when maxTime > 0.
192 Default: Computed as cutoff_time / max_time. [BudgetEstimation]
193 first_test: Specifies how many instances are evaluated before the first
194 elimination test. IRACE Default: 5. [firstTest]
195 mu: Parameter used to define the number of configurations sampled and
196 evaluated at each iteration. IRACE Default: 5. [mu]
197 max_iterations: Maximum number of iterations to be executed. Each iteration
198 involves the generation of new configurations and the use of racing to
199 select the best configurations. By default (with 0), irace calculates a
200 minimum number of iterations as N^iter = ⌊2 + log2 N param⌋, where
201 N^param is the number of non-fixed parameters to be tuned.
202 Setting this parameter may make irace stop sooner than it should without
203 using all the available budget. We recommend to use the default value.
204 """
205 """
206 Other possible arguments that are not added yet to Sparkle:
207 --test-num-elites Number of elite configurations returned by irace that
208 will be tested if test instances are provided.
209 Default: 1.
210 --test-iteration-elites Enable/disable testing the elite configurations
211 found at each iteration. Default: 0.
212 --test-type Statistical test used for elimination. The default
213 value selects t-test if capping is enabled or F-test,
214 otherwise. Valid values are: F-test (Friedman test),
215 t-test (pairwise t-tests with no correction),
216 t-test-bonferroni (t-test with Bonferroni's correction
217 for multiple comparisons), t-test-holm (t-test with
218 Holm's correction for multiple comparisons).
219 --each-test Number of instances evaluated between elimination
220 tests. Default: 1.
221 --load-balancing Enable/disable load-balancing when executing
222 experiments in parallel. Load-balancing makes better
223 use of computing resources, but increases
224 communication overhead. If this overhead is large,
225 disabling load-balancing may be faster. Default: 1.
226 --mpi Enable/disable MPI. Use Rmpi to execute targetRunner
227 in parallel (parameter parallel is the number of
228 slaves). Default: 0.
229 --batchmode Specify how irace waits for jobs to finish when
230 targetRunner submits jobs to a batch cluster: sge,
231 pbs, torque, slurm or htcondor. targetRunner must
232 submit jobs to the cluster using, for example, qsub.
233 Default: 0.
234 --digits Maximum number of decimal places that are significant
235 for numerical (real) parameters. Default: 4.
236 --soft-restart Enable/disable the soft restart strategy that avoids
237 premature convergence of the probabilistic model.
238 Default: 1.
239 --soft-restart-threshold Soft restart threshold value for numerical
240 parameters. If NA, NULL or "", it is computed as
241 10^-digits.
242 -e,--elitist Enable/disable elitist irace. Default: 1.
243 --elitist-new-instances Number of instances added to the execution list
244 before previous instances in elitist irace. Default:
245 1.
246 --elitist-limit In elitist irace, maximum number per race of
247 elimination tests that do not eliminate a
248 configuration. Use 0 for no limit. Default: 2.
249 --capping Enable the use of adaptive capping, a technique
250 designed for minimizing the computation time of
251 configurations. This is only available when elitist is
252 active. Default: 0.
253 --capping-type Measure used to obtain the execution bound from the
254 performance of the elite configurations: median, mean,
255 worst, best. Default: median.
256 --bound-type Method to calculate the mean performance of elite
257 configurations: candidate or instance. Default:
258 candidate.
259 --bound-max Maximum execution bound for targetRunner. It must be
260 specified when capping is enabled. Default: 0.
261 --bound-digits Precision used for calculating the execution time. It
262 must be specified when capping is enabled. Default: 0.
263 --bound-par Penalization constant for timed out executions
264 (executions that reach boundMax execution time).
265 Default: 1.
266 --bound-as-timeout Replace the configuration cost of bounded executions
267 with boundMax. Default: 1.
268 --postselection Percentage of the configuration budget used to perform
269 a postselection race of the best configurations of
270 each iteration after the execution of irace. Default:
271 0.
272 --iterations Maximum number of iterations. Default: 0.
273 --experiments-per-iteration Number of runs of the target algorithm per
274 iteration. Default: 0.
275 --min-survival Minimum number of configurations needed to continue
276 the execution of each race (iteration). Default: 0.
277 --num-configurations Number of configurations to be sampled and evaluated
278 at each iteration. Default: 0.
279 --confidence Confidence level for the elimination test. Default:
280 0.95."""
281 super().__init__(solver, instance_set, sparkle_objectives, parent_directory)
282 self.solver = solver
283 self.instance_set = instance_set
284 if sparkle_objectives is not None:
285 if len(sparkle_objectives) > 1:
286 print("WARNING: IRACE does not have multi objective support. "
287 "Only the first objective will be used.")
288 self.sparkle_objective = sparkle_objectives[0]
289 else:
290 self.sparkle_objective = None
292 self.number_of_runs = number_of_runs
293 self.solver_calls = solver_calls if solver_calls and solver_calls > 0 else None
294 self.max_time = max_time if max_time and max_time > 0 else None
295 self.cutoff_time = cutoff_time
296 self.budget_estimation = budget_estimation
297 self.first_test = first_test
298 self.mu = mu
299 self.max_iterations = max_iterations
301 # Pathing
302 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
303 self.tmp = self.directory / "tmp"
304 self.validation = self.directory / "validation"
305 self.results_directory = self.directory / "results"
307 def create_scenario(self: IRACEScenario) -> None:
308 """Create scenario with solver and instances in the parent directory.
310 This prepares all the necessary subdirectories related to configuration.
311 Removes any existing directory if it overlaps with the scenario name.
313 Args:
314 parent_directory: Directory in which the scenario should be created.
315 """
316 # Set up directories
317 shutil.rmtree(self.directory, ignore_errors=True) # Clear directory
318 self.directory.mkdir(exist_ok=True, parents=True)
319 self.tmp.mkdir(exist_ok=True)
320 self.validation.mkdir(exist_ok=True)
321 self.results_directory.mkdir(exist_ok=True)
323 with self.instance_file_path.open("w+") as file:
324 for instance_path in self.instance_set._instance_paths:
325 file.write(f"{instance_path.name}\n")
326 self.create_scenario_file()
328 def create_scenario_file(self: ConfigurationScenario) -> Path:
329 """Create a file from the IRACE scenario.
331 Returns:
332 Path to the created file.
333 """
334 solver_path = self.solver.directory.absolute()
335 with self.scenario_file_path.open("w") as file:
336 file.write(
337 f'execDir = "{self.directory.absolute()}"\n'
338 'targetRunnerLauncher = "python3"\n'
339 f'targetRunner = "{IRACE.configurator_target.absolute()}"\n'
340 'targetRunnerLauncherArgs = "{targetRunner} '
341 f"{solver_path} {self.sparkle_objective} {self.cutoff_time} "
342 '{targetRunnerArgs}"\n'
343 f"deterministic = {1 if self.solver.deterministic else 0}\n"
344 "parameterFile = "
345 f'"{self.solver.get_pcs_file(port_type="""IRACE""").absolute()}"\n'
346 "forbiddenFile = "
347 f'"{self.solver.get_forbidden(port_type="""IRACE""").absolute()}"\n'
348 f'trainInstancesDir = "{self.instance_set.directory.absolute()}"\n'
349 f'trainInstancesFile = "{self.instance_file_path.absolute()}"\n'
350 "debugLevel = 1\n" # The verbosity level of IRACE
351 )
352 if self.solver_calls is not None:
353 file.write(f"maxExperiments = {self.solver_calls}\n")
354 elif self.max_time is not None:
355 file.write(f"maxTime = {self.max_time}\n")
356 if self.solver_calls is not None and self.max_time is not None:
357 print("WARNING: Both solver calls and max time specified for scenario. "
358 "This is not supported by IRACE, defaulting to solver calls.")
359 elif self.solver_calls is None and self.max_time is None:
360 print("WARNING: Neither solver calls nor max time specified. "
361 "Either budget is required for the IRACE scenario.")
362 if self.max_time is not None and self.budget_estimation is None:
363 # Auto Estimate
364 if self.cutoff_time < self.max_time:
365 self.budget_estimation = self.cutoff_time / self.max_time
366 file.write(f"budgetEstimation = {self.budget_estimation}\n")
367 if self.first_test is not None:
368 file.write(f"firstTest = {self.first_test}\n")
369 if self.mu is not None:
370 file.write(f"mu = {self.mu}\n")
371 if self.max_iterations is not None:
372 file.write(f"nbIterations = {self.max_iterations}\n")
373 print("Verifying contents of IRACE scenario file and testing solver call...")
374 check_file = subprocess.run(
375 [f"{IRACE.configurator_executable.absolute()}",
376 "-s", f"{self.scenario_file_path.absolute()}", "--check"],
377 capture_output=True)
378 if check_file.returncode != 0:
379 stdout_msg = "\n".join([
380 line for line in check_file.stdout.decode().splitlines()
381 if not line.startswith("#")])
382 print("An error occured in the IRACE scenario file:\n",
383 self.scenario_file_path.open("r").read(),
384 stdout_msg, "\n",
385 check_file.stderr.decode())
386 else:
387 print("IRACE scenario file is valid.")
388 return self.scenario_file_path
390 def serialize(self: IRACEScenario) -> dict:
391 """Serialize the IRACE scenario."""
392 return {
393 "number_of_runs": self.number_of_runs,
394 "solver_calls": self.solver_calls,
395 "max_time": self.max_time,
396 "cutoff_time": self.cutoff_time,
397 "budget_estimation": self.budget_estimation,
398 "first_test": self.first_test,
399 "mu": self.mu,
400 "max_iterations": self.max_iterations,
401 }
403 @staticmethod
404 def from_file(scenario_file: Path) -> IRACEScenario:
405 """Reads scenario file and initalises IRACEScenario."""
406 scenario_dict = {keyvalue[0]: keyvalue[1]
407 for keyvalue in (line.split(" = ", maxsplit=1)
408 for line in scenario_file.open().readlines()
409 if line.strip() != "")}
410 _, solver_path, objective, cutoff, _ =\
411 scenario_dict.pop("targetRunnerLauncherArgs").split(" ")
412 scenario_dict["sparkle_objectives"] = [resolve_objective(objective)]
413 scenario_dict["cutoff_time"] = int(cutoff)
414 scenario_dict["parent_directory"] = scenario_file.parent.parent
415 scenario_dict["number_of_runs"] =\
416 len([p for p in (scenario_file.parent / "results").iterdir()])
417 scenario_dict.pop("targetRunner")
418 scenario_dict.pop("execDir")
419 scenario_dict.pop("targetRunnerLauncher")
420 scenario_dict.pop("deterministic")
421 scenario_dict.pop("parameterFile")
422 scenario_dict.pop("forbiddenFile")
423 scenario_dict.pop("debugLevel")
424 instance_set_path =\
425 Path(scenario_dict.pop("trainInstancesDir").strip().strip('"'))
426 instance_set = Instance_Set(instance_set_path)
427 solver = Solver(Path(solver_path.strip()))
428 scenario_dict.pop("trainInstancesFile")
429 # Replace keys with scenario variable names
430 if "budgetEstimation" in scenario_dict:
431 scenario_dict["budget_estimation"] =\
432 float(scenario_dict.pop(("budgetEstimation")))
433 if "firstTest" in scenario_dict:
434 scenario_dict["first_test"] = int(scenario_dict.pop("firstTest"))
435 if "mu" in scenario_dict:
436 scenario_dict["mu"] = int(scenario_dict.pop("mu"))
437 if "nbIterations" in scenario_dict:
438 scenario_dict["max_iterations"] = int(scenario_dict.pop("nbIterations"))
439 if "maxExperiments" in scenario_dict:
440 scenario_dict["solver_calls"] = int(scenario_dict.pop("maxExperiments"))
441 if "maxTime" in scenario_dict:
442 scenario_dict["max_time"] = int(scenario_dict.pop("maxTime"))
444 return IRACEScenario(solver, instance_set, **scenario_dict)