Coverage for sparkle/configurator/implementations/smac2.py: 80%
174 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Configurator classes to implement SMAC2 in Sparkle."""
4from __future__ import annotations
5from pathlib import Path
6import fcntl
7import glob
8import shutil
10import pandas as pd
12import runrunner as rrr
13from runrunner import Runner, Run
15from sparkle.configurator.configurator import Configurator, ConfigurationScenario
16from sparkle.solver import Solver
17from sparkle.solver.validator import Validator
18from sparkle.instance import InstanceSet, Instance_Set
19from sparkle.types import SparkleObjective
22class SMAC2(Configurator):
23 """Class for SMAC2 (Java) configurator."""
24 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
25 "Components/smac-v2.10.03-master-778"
26 configurator_executable = configurator_path / "smac"
27 configurator_target = configurator_path / "smac_target_algorithm.py"
29 version = "2.10.03"
30 full_name = "Sequential Model-based Algorithm Configuration"
32 def __init__(self: SMAC2,
33 base_dir: Path,
34 output_path: Path) -> None:
35 """Returns the SMAC configurator, Java SMAC V2.10.03.
37 Args:
38 objectives: The objectives to optimize. Only supports one objective.
39 base_dir: The path where the configurator will be executed in.
40 output_path: The path where the output will be placed.
41 """
42 output_path = output_path / SMAC2.__name__
43 output_path.mkdir(parents=True, exist_ok=True)
44 return super().__init__(
45 validator=Validator(out_dir=output_path),
46 output_path=output_path,
47 base_dir=base_dir,
48 tmp_path=output_path / "tmp",
49 multi_objective_support=False)
51 @property
52 def name(self: SMAC2) -> str:
53 """Returns the name of the configurator."""
54 return SMAC2.__name__
56 @property
57 def scenario_class(self: Configurator) -> ConfigurationScenario:
58 """Returns the SMAC2 scenario class."""
59 return SMAC2Scenario
61 def configure(self: Configurator,
62 scenario: ConfigurationScenario,
63 validate_after: bool = True,
64 sbatch_options: list[str] = [],
65 num_parallel_jobs: int = None,
66 base_dir: Path = None,
67 run_on: Runner = Runner.SLURM) -> list[Run]:
68 """Start configuration job.
70 Args:
71 scenario: ConfigurationScenario object
72 validate_after: Whether the Validator will be called after the configuration
73 sbatch_options: List of slurm batch options to use
74 num_parallel_jobs: The maximum number of jobs to run parallel.
75 base_dir: The path where the sbatch scripts will be created for Slurm.
76 run_on: On which platform to run the jobs. Default: Slurm.
78 Returns:
79 A RunRunner Run object.
80 """
81 if shutil.which("java") is None:
82 raise RuntimeError(
83 "SMAC2 requires Java 1.8.0_402, but Java is not installed. "
84 "Please ensure Java is installed and try again."
85 )
86 scenario.create_scenario()
87 output_csv = scenario.validation / "configurations.csv"
88 output_csv.parent.mkdir(exist_ok=True, parents=True)
89 output = [f"{(scenario.results_directory).absolute()}/"
90 f"{scenario.name}_seed_{seed}_smac.txt"
91 for seed in range(scenario.number_of_runs)]
92 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
93 f"{SMAC2.__name__} {output[seed]} {output_csv.absolute()} "
94 f"{SMAC2.configurator_executable.absolute()} "
95 f"--scenario-file {scenario.scenario_file_path.absolute()} "
96 f"--seed {seed} "
97 f"--execdir {scenario.tmp.absolute()}"
98 for seed in range(scenario.number_of_runs)]
99 parallel_jobs = scenario.number_of_runs
100 if num_parallel_jobs is not None:
101 parallel_jobs = max(num_parallel_jobs, scenario.number_of_runs)
102 runs = [rrr.add_to_queue(
103 runner=run_on,
104 cmd=cmds,
105 name=f"{self.name}: {scenario.solver.name} on {scenario.instance_set.name}",
106 base_dir=base_dir,
107 path=scenario.results_directory,
108 output_path=output,
109 parallel_jobs=parallel_jobs,
110 sbatch_options=sbatch_options,
111 srun_options=["-N1", "-n1"])]
113 if validate_after:
114 self.validator.out_dir = output_csv.parent
115 self.validator.tmp_out_dir = base_dir
116 validate_run = self.validator.validate(
117 [scenario.solver] * scenario.number_of_runs,
118 output_csv,
119 [scenario.instance_set],
120 [scenario.sparkle_objective],
121 scenario.cutoff_time,
122 subdir=Path(),
123 dependency=runs,
124 sbatch_options=sbatch_options,
125 run_on=run_on)
126 runs.append(validate_run)
128 if run_on == Runner.LOCAL:
129 for run in runs:
130 run.wait()
131 return runs
133 @staticmethod
134 def organise_output(output_source: Path, output_target: Path = None) -> None | str:
135 """Retrieves configurations from SMAC files and places them in output."""
136 call_key = SMAC2.configurator_target.name
137 # Last line describing a call is the best found configuration
138 for line in reversed(output_source.open("r").readlines()):
139 if call_key in line:
140 call_str = line.split(call_key, maxsplit=1)[1].strip()
141 # The Configuration appears after the first 6 arguments
142 configuration = call_str.split(" ", 7)[-1]
143 if output_target is None:
144 return configuration
145 with output_target.open("a") as fout:
146 fcntl.flock(fout.fileno(), fcntl.LOCK_EX)
147 fout.write(configuration + "\n")
148 break
150 @staticmethod
151 def get_smac_run_obj(objective: SparkleObjective) -> str:
152 """Return the SMAC run objective based on the Performance Measure.
154 Returns:
155 A string that represents the run objective set in the settings.
156 """
157 if objective.time:
158 return "RUNTIME"
159 return "QUALITY"
161 def get_status_from_logs(self: SMAC2) -> None:
162 """Method to scan the log files of the configurator for warnings."""
163 base_dir = self.output_path / "scenarios"
164 if not base_dir.exists():
165 return
166 print(f"Checking the log files of configurator {type(self).__name__} for "
167 "warnings...")
168 scenarios = [f for f in base_dir.iterdir() if f.is_dir()]
169 for scenario in scenarios:
170 log_dir = scenario / "outdir_train_configuration" \
171 / (scenario.name + "_scenario")
172 warn_files = glob.glob(str(log_dir) + "/log-warn*")
173 non_empty = [log_file for log_file in warn_files
174 if Path(log_file).stat().st_size > 0]
175 if len(non_empty) > 0:
176 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see "
177 "the following log file(s) for more information:")
178 for log_file in non_empty:
179 print(f"\t-{log_file}")
180 else:
181 print(f"Scenario {scenario.name} has no warnings.")
184class SMAC2Scenario(ConfigurationScenario):
185 """Class to handle SMAC2 configuration scenarios."""
186 def __init__(self: SMAC2Scenario, solver: Solver,
187 instance_set: InstanceSet,
188 sparkle_objectives: list[SparkleObjective],
189 parent_directory: Path,
190 number_of_runs: int = None,
191 solver_calls: int = None,
192 max_iterations: int = None,
193 cpu_time: int = None,
194 wallclock_time: int = None,
195 cutoff_time: int = None,
196 target_cutoff_length: str = None,
197 use_cpu_time_in_tunertime: bool = None,
198 feature_data_df: pd.DataFrame = None)\
199 -> None:
200 """Initialize scenario paths and names.
202 Args:
203 solver: Solver that should be configured.
204 instance_set: Instances object for the scenario.
205 sparkle_objectives: SparkleObjectives used for each run of the configuration.
206 Will be simplified to the first objective.
207 parent_directory: Directory in which the scenario should be created.
208 number_of_runs: The number of configurator runs to perform
209 for configuring the solver.
210 solver_calls: The number of times the solver is called for each
211 configuration run
212 max_iterations: The maximum number of iterations allowed for each
213 configuration run. [iteration-limit, numIterations, numberOfIterations]
214 cpu_time: The time budget allocated for each configuration run. (cpu)
215 wallclock_time: The time budget allocated for each configuration run.
216 (wallclock)
217 cutoff_time: The maximum time allowed for each individual run during
218 configuration.
219 target_cutoff_length: A domain specific measure of when the algorithm
220 should consider itself done.
221 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for
222 budget deduction. Defaults in SMAC2 to True.
223 feature_data_df: If features are used, this contains the feature data.
224 Defaults to None.
225 """
226 super().__init__(solver, instance_set, sparkle_objectives, parent_directory)
227 self.solver = solver
228 self.instance_set = instance_set
229 self.name = f"{self.solver.name}_{self.instance_set.name}"
231 if sparkle_objectives is not None:
232 if len(sparkle_objectives) > 1:
233 print("WARNING: SMAC2 does not have multi objective support. "
234 "Only the first objective will be used.")
235 self.sparkle_objective = sparkle_objectives[0]
236 else:
237 self.sparkle_objective = None
239 self.number_of_runs = number_of_runs
240 self.solver_calls = solver_calls
241 self.cpu_time = cpu_time
242 self.wallclock_time = wallclock_time
243 self.cutoff_time = cutoff_time
244 self.cutoff_length = target_cutoff_length
245 self.max_iterations = max_iterations
246 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime
247 self.feature_data = feature_data_df
249 # Scenario Paths
250 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
251 self.tmp = self.directory / "tmp"
252 self.validation = self.directory / "validation"
253 self.results_directory = self.directory / "results"
255 # SMAC2 Specific
256 self.outdir_train = self.directory / "outdir_train_configuration"
258 def create_scenario(self: SMAC2Scenario) -> None:
259 """Create scenario with solver and instances in the parent directory.
261 This prepares all the necessary subdirectories related to configuration.
263 Args:
264 parent_directory: Directory in which the scenario should be created.
265 """
266 # Prepare scenario directory
267 shutil.rmtree(self.directory, ignore_errors=True)
268 self.directory.mkdir(parents=True)
269 # Create empty directories as needed
270 self.outdir_train.mkdir()
271 self.tmp.mkdir()
272 self.results_directory.mkdir(parents=True) # Prepare results directory
274 self._prepare_instances()
276 if self.feature_data is not None:
277 self._create_feature_file()
279 self.create_scenario_file()
281 def create_scenario_file(self: SMAC2Scenario) -> Path:
282 """Create a file with the configuration scenario.
284 Writes supplementary information to the target algorithm (algo =) as:
285 algo = {configurator_target} {solver_directory} {sparkle_objective}
286 """
287 with self.scenario_file_path.open("w") as file:
288 file.write(f"algo = {SMAC2.configurator_target.absolute()} "
289 f"{self.solver.directory.absolute()} {self.sparkle_objective} \n"
290 f"execdir = {self.tmp.absolute()}/\n"
291 f"deterministic = {1 if self.solver.deterministic else 0}\n"
292 f"run_obj = {self._get_performance_measure()}\n"
293 f"cutoffTime = {self.cutoff_time}\n"
294 f"cutoff_length = {self.cutoff_length}\n"
295 f"paramfile = {self.solver.get_pcs_file()}\n"
296 f"outdir = {self.outdir_train.absolute()}\n"
297 f"instance_file = {self.instance_file_path.absolute()}\n"
298 f"test_instance_file = {self.instance_file_path.absolute()}\n")
299 if self.max_iterations is not None:
300 file.write(f"iteration-limit = {self.max_iterations}\n")
301 if self.wallclock_time is not None:
302 file.write(f"wallclock-limit = {self.wallclock_time}\n")
303 if self.cpu_time is not None:
304 file.write(f"cputime-limit = {self.cpu_time}\n")
305 if self.solver_calls is not None:
306 file.write(f"runcount-limit = {self.solver_calls}\n")
307 if self.feature_data is not None:
308 file.write(f"feature_file = {self.feature_file_path}\n")
309 if self.use_cpu_time_in_tunertime is not None:
310 file.write("use-cpu-time-in-tunertime = "
311 f"{self.use_cpu_time_in_tunertime}\n")
312 # We don't let SMAC do the validation
313 file.write("validation = false" + "\n")
314 return self.scenario_file_path
316 def _prepare_instances(self: SMAC2Scenario) -> None:
317 """Create instance list file without instance specifics."""
318 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True)
319 with self.instance_file_path.open("w+") as file:
320 for instance_path in self.instance_set._instance_paths:
321 file.write(f"{instance_path.absolute()}\n")
323 def _get_performance_measure(self: SMAC2Scenario) -> str:
324 """Retrieve the performance measure of the SparkleObjective.
326 Returns:
327 Performance measure of the sparkle objective
328 """
329 if self.sparkle_objective.time:
330 return "RUNTIME"
331 return "QUALITY"
333 def _create_feature_file(self: SMAC2Scenario) -> None:
334 """Create CSV file from feature data."""
335 self.feature_file_path = Path(self.directory
336 / f"{self.instance_set.name}_features.csv")
337 self.feature_data.to_csv(self.directory
338 / self.feature_file_path, index_label="INSTANCE_NAME")
340 def _clean_up_scenario_dirs(self: SMAC2Scenario,
341 configurator_path: Path,) -> list[Path]:
342 """Yield directories to clean up after configuration scenario is done.
344 Returns:
345 list[str]: Full paths to directories that can be removed
346 """
347 result = []
348 configurator_solver_path = configurator_path / "scenarios"\
349 / f"{self.solver.name}_{self.instance_set.name}"
351 for index in range(self.number_of_runs):
352 dir = configurator_solver_path / str(index)
353 result.append(dir)
354 return result
356 def serialize_scenario(self: SMAC2Scenario) -> dict:
357 """Transform ConfigurationScenario to dictionary format."""
358 return {
359 "number_of_runs": self.number_of_runs,
360 "solver_calls": self.solver_calls,
361 "cpu_time": self.cpu_time,
362 "wallclock_time": self.wallclock_time,
363 "cutoff_time": self.cutoff_time,
364 "cutoff_length": self.cutoff_length,
365 "max_iterations": self.max_iterations,
366 "sparkle_objective": self.sparkle_objective.name,
367 "feature_data": self.feature_data,
368 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime
369 }
371 @staticmethod
372 def from_file(scenario_file: Path) -> SMAC2Scenario:
373 """Reads scenario file and initalises SMAC2Scenario."""
374 config = {keyvalue[0]: keyvalue[1]
375 for keyvalue in (line.strip().split(" = ", maxsplit=1)
376 for line in scenario_file.open().readlines()
377 if line.strip() != "")}
379 # Collect relevant settings
380 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None
381 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \
382 else None
383 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \
384 else None
385 max_iterations = int(config["iteration-limit"]) if "iteration-limit" in config \
386 else None
387 use_cpu_time_in_tunertime = config["use-cputime-in-tunertime"]\
388 if "use-cputime-in-tunertime" in config else None
390 _, solver_path, objective_str = config["algo"].split(" ")
391 objective = SparkleObjective(objective_str)
392 solver = Solver(Path(solver_path.strip()))
393 # Extract the instance set from the instance file
394 instance_file_path = Path(config["instance_file"])
395 instance_set_path = Path(instance_file_path.open().readline().strip()).parent
396 instance_set = Instance_Set(Path(instance_set_path))
397 results_folder = scenario_file.parent / "results"
398 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()]
399 number_of_runs = len(state_run_dirs)
400 return SMAC2Scenario(solver,
401 instance_set,
402 [objective],
403 instance_file_path.parent.parent,
404 number_of_runs,
405 solver_calls,
406 max_iterations,
407 cpu_time,
408 wallclock_limit,
409 int(config["cutoffTime"]),
410 config["cutoff_length"],
411 use_cpu_time_in_tunertime)