Coverage for sparkle/configurator/implementations/smac2.py: 73%
203 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-27 09:10 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-27 09:10 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Configurator class to use different configurators like SMAC."""
5from __future__ import annotations
6from typing import Callable
7from pathlib import Path
8import ast
9from statistics import mean
10import operator
11import fcntl
12import glob
13import shutil
15import pandas as pd
17import runrunner as rrr
18from runrunner import Runner, Run
20from sparkle.configurator.configurator import Configurator, ConfigurationScenario
21from sparkle.solver import Solver
22from sparkle.solver.validator import Validator
23from sparkle.instance import InstanceSet
24from sparkle.types import SparkleObjective
27class SMAC2(Configurator):
28 """Class for SMAC2 (Java) configurator."""
29 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
30 "Components/smac-v2.10.03-master-778"
31 target_algorithm = "smac_target_algorithm.py"
33 def __init__(self: SMAC2,
34 objectives: list[SparkleObjective],
35 base_dir: Path,
36 output_path: Path) -> None:
37 """Returns the SMAC configurator, Java SMAC V2.10.03.
39 Args:
40 objectives: The objectives to optimize. Only supports one objective.
41 base_dir: The path where the configurator will be executed in.
42 output_path: The path where the output will be placed.
43 """
44 output_path = output_path / SMAC2.__name__
45 output_path.mkdir(parents=True, exist_ok=True)
46 return super().__init__(
47 validator=Validator(out_dir=output_path),
48 output_path=output_path,
49 executable_path=SMAC2.configurator_path / "smac",
50 configurator_target=SMAC2.configurator_path / SMAC2.target_algorithm,
51 objectives=objectives,
52 base_dir=base_dir,
53 tmp_path=output_path / "tmp",
54 multi_objective_support=False)
56 @property
57 def scenario_class(self: Configurator) -> ConfigurationScenario:
58 """Returns the SMAC2 scenario class."""
59 return SMAC2Scenario
61 def configure(self: Configurator,
62 scenario: ConfigurationScenario,
63 validate_after: bool = True,
64 sbatch_options: list[str] = [],
65 num_parallel_jobs: int = None,
66 base_dir: Path = None,
67 run_on: Runner = Runner.SLURM) -> list[Run]:
68 """Start configuration job.
70 Args:
71 scenario: ConfigurationScenario object
72 validate_after: Whether the Validator will be called after the configuration
73 sbatch_options: List of slurm batch options to use
74 num_parallel_jobs: The maximum number of jobs to run parallel.
75 base_dir: The path where the sbatch scripts will be created for Slurm.
76 run_on: On which platform to run the jobs. Default: Slurm.
78 Returns:
79 A RunRunner Run object.
80 """
81 self.scenario = scenario
82 self.scenario.create_scenario(parent_directory=self.output_path)
83 output_csv = self.scenario.validation / "configurations.csv"
84 output_csv.parent.mkdir(exist_ok=True, parents=True)
85 output = [f"{(self.scenario.result_directory).absolute()}/"
86 f"{self.scenario.name}_seed_{seed}_smac.txt"
87 for seed in range(self.scenario.number_of_runs)]
88 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
89 f"{SMAC2.__name__} {output[seed]} {output_csv.absolute()} "
90 f"{self.executable_path.absolute()} "
91 f"--scenario-file {(self.scenario.scenario_file_path).absolute()} "
92 f"--seed {seed} "
93 f"--execdir {self.scenario.tmp.absolute()}"
94 for seed in range(self.scenario.number_of_runs)]
95 parallel_jobs = self.scenario.number_of_runs
96 if num_parallel_jobs is not None:
97 parallel_jobs = max(num_parallel_jobs,
98 self.scenario.number_of_runs)
99 configuration_run = rrr.add_to_queue(
100 runner=run_on,
101 cmd=cmds,
102 name="configure_solver",
103 base_dir=base_dir,
104 output_path=output,
105 parallel_jobs=parallel_jobs,
106 sbatch_options=sbatch_options,
107 srun_options=["-N1", "-n1"])
108 runs = [configuration_run]
110 if validate_after:
111 self.validator.out_dir = output_csv.parent
112 self.validator.tmp_out_dir = base_dir
113 validate_run = self.validator.validate(
114 [scenario.solver] * self.scenario.number_of_runs,
115 output_csv.absolute(),
116 [scenario.instance_set],
117 [self.scenario.sparkle_objective],
118 scenario.cutoff_time,
119 subdir=Path(),
120 dependency=configuration_run,
121 sbatch_options=sbatch_options,
122 run_on=run_on)
123 runs.append(validate_run)
125 if run_on == Runner.LOCAL:
126 for run in runs:
127 run.wait()
128 return runs
130 def get_optimal_configuration(
131 self: Configurator,
132 solver: Solver,
133 instance_set: InstanceSet,
134 objective: SparkleObjective = None,
135 aggregate_config: Callable = mean) -> tuple[float, str]:
136 """Returns optimal value and configuration string of solver on instance set."""
137 if self.scenario is None:
138 self.set_scenario_dirs(solver, instance_set)
139 results = self.validator.get_validation_results(
140 solver,
141 instance_set,
142 source_dir=self.scenario.validation,
143 subdir=self.scenario.validation.relative_to(self.validator.out_dir))
144 # Group the results per configuration
145 if objective is None:
146 objective = self.objectives[0]
147 value_column = results[0].index(objective.name)
148 config_column = results[0].index("Configuration")
149 configurations = list(set(row[config_column] for row in results[1:]))
150 config_scores = []
151 for config in configurations:
152 values = [float(row[value_column])
153 for row in results[1:] if row[1] == config]
154 config_scores.append(aggregate_config(values))
156 comparison = operator.lt if objective.minimise else operator.gt
158 # Return optimal value
159 min_index = 0
160 current_optimal = config_scores[min_index]
161 for i, score in enumerate(config_scores):
162 if comparison(score, current_optimal):
163 min_index, current_optimal = i, score
165 # Return the optimal configuration dictionary as commandline args
166 config_str = configurations[min_index].strip(" ")
167 if config_str.startswith("{"):
168 config = ast.literal_eval(config_str)
169 config_str = " ".join([f"-{key} '{config[key]}'" for key in config])
170 return current_optimal, config_str
172 @staticmethod
173 def organise_output(output_source: Path, output_target: Path = None) -> None | str:
174 """Retrieves configurations from SMAC files and places them in output."""
175 call_key = SMAC2.target_algorithm
176 # Last line describing a call is the best found configuration
177 for line in reversed(output_source.open("r").readlines()):
178 if call_key in line:
179 call_str = line.split(call_key, maxsplit=1)[1].strip()
180 # The Configuration appears after the first 6 arguments
181 configuration = call_str.split(" ", 7)[-1]
182 if output_target is None:
183 return configuration
184 with output_target.open("a") as fout:
185 fcntl.flock(fout.fileno(), fcntl.LOCK_EX)
186 fout.write(configuration + "\n")
187 break
189 def set_scenario_dirs(self: Configurator,
190 solver: Solver, instance_set: InstanceSet) -> None:
191 """Patching method to allow the rebuilding of configuratio scenario."""
192 self.scenario = self.scenario_class(solver, instance_set)
193 self.scenario._set_paths(self.output_path)
195 @staticmethod
196 def get_smac_run_obj(objective: SparkleObjective) -> str:
197 """Return the SMAC run objective based on the Performance Measure.
199 Returns:
200 A string that represents the run objective set in the settings.
201 """
202 if objective.time:
203 return "RUNTIME"
204 return "QUALITY"
206 def get_status_from_logs(self: SMAC2) -> None:
207 """Method to scan the log files of the configurator for warnings."""
208 base_dir = self.output_path / "scenarios"
209 if not base_dir.exists():
210 return
211 print(f"Checking the log files of configurator {type(self).__name__} for "
212 "warnings...")
213 scenarios = [f for f in base_dir.iterdir() if f.is_dir()]
214 for scenario in scenarios:
215 log_dir = scenario / "outdir_train_configuration" \
216 / (scenario.name + "_scenario")
217 warn_files = glob.glob(str(log_dir) + "/log-warn*")
218 non_empty = [log_file for log_file in warn_files
219 if Path(log_file).stat().st_size > 0]
220 if len(non_empty) > 0:
221 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see "
222 "the following log file(s) for more information:")
223 for log_file in non_empty:
224 print(f"\t-{log_file}")
225 else:
226 print(f"Scenario {scenario.name} has no warnings.")
229class SMAC2Scenario(ConfigurationScenario):
230 """Class to handle SMAC2 configuration scenarios."""
231 def __init__(self: ConfigurationScenario, solver: Solver,
232 instance_set: InstanceSet, number_of_runs: int = None,
233 solver_calls: int = None, cpu_time: int = None,
234 wallclock_time: int = None, cutoff_time: int = None,
235 cutoff_length: int = None,
236 sparkle_objectives: list[SparkleObjective] = None,
237 use_features: bool = None, configurator_target: Path = None,
238 feature_data_df: pd.DataFrame = None)\
239 -> None:
240 """Initialize scenario paths and names.
242 Args:
243 solver: Solver that should be configured.
244 instance_set: Instances object for the scenario.
245 number_of_runs: The number of configurator runs to perform
246 for configuring the solver.
247 solver_calls: The number of times the solver is called for each
248 configuration run
249 cpu_time: The time budget allocated for each configuration run. (cpu)
250 wallclock_time: The time budget allocated for each configuration run.
251 (wallclock)
252 cutoff_time: The maximum time allowed for each individual run during
253 configuration.
254 cutoff_length: The maximum number of iterations allowed for each
255 individual run during configuration.
256 sparkle_objectives: SparkleObjectives used for each run of the configuration.
257 Will be simplified to the first objective.
258 use_features: Boolean indicating if features should be used.
259 configurator_target: The target Python script to be called.
260 This script standardises Configurator I/O for solver wrappers.
261 feature_data_df: If features are used, this contains the feature data.
262 Defaults to None.
263 """
264 super().__init__(solver, instance_set, sparkle_objectives)
265 self.solver = solver
266 self.instance_set = instance_set
267 self.name = f"{self.solver.name}_{self.instance_set.name}"
268 self.sparkle_objective = sparkle_objectives[0] if sparkle_objectives else None
270 self.number_of_runs = number_of_runs
271 self.solver_calls = solver_calls
272 self.cpu_time = cpu_time
273 self.wallclock_time = wallclock_time
274 self.cutoff_time = cutoff_time
275 self.cutoff_length = cutoff_length
276 self.use_features = use_features
277 self.configurator_target = configurator_target
278 self.feature_data = feature_data_df
280 self.parent_directory = Path()
281 self.directory = Path()
282 self.result_directory = Path()
283 self.scenario_file_path = Path()
284 self.feature_file_path = Path()
285 self.instance_file_path = Path()
287 def create_scenario(self: ConfigurationScenario, parent_directory: Path) -> None:
288 """Create scenario with solver and instances in the parent directory.
290 This prepares all the necessary subdirectories related to configuration.
292 Args:
293 parent_directory: Directory in which the scenario should be created.
294 """
295 self._set_paths(parent_directory)
296 self._prepare_scenario_directory()
297 self._prepare_result_directory()
298 self._prepare_instances()
300 if self.use_features:
301 self._create_feature_file()
303 self._create_scenario_file()
305 def _set_paths(self: ConfigurationScenario, parent_directory: Path) -> None:
306 """Set the paths for the scenario based on the specified parent directory."""
307 self.parent_directory = parent_directory
308 self.directory = self.parent_directory / "scenarios" / self.name
309 self.result_directory = self.directory / "results"
310 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
311 self.outdir_train = self.directory / "outdir_train_configuration"
312 self.tmp = self.directory / "tmp"
313 self.validation = self.directory / "validation"
315 def _prepare_scenario_directory(self: ConfigurationScenario) -> None:
316 """Delete old scenario dir, recreate it, create empty dirs inside."""
317 shutil.rmtree(self.directory, ignore_errors=True)
318 self.directory.mkdir(parents=True)
320 # Create empty directories as needed
321 self.outdir_train.mkdir()
322 self.tmp.mkdir()
324 def _prepare_result_directory(self: ConfigurationScenario) -> None:
325 """Delete possible files in result directory."""
326 shutil.rmtree(self.result_directory, ignore_errors=True)
327 self.result_directory.mkdir(parents=True)
329 def _create_scenario_file(self: ConfigurationScenario) -> None:
330 """Create a file with the configuration scenario.
332 Writes supplementary information to the target algorithm (algo =) as:
333 algo = {configurator_target} {solver_directory} {sparkle_objective}
334 """
335 self.scenario_file_path = self.directory / f"{self.name}_scenario.txt"
336 with self.scenario_file_path.open("w") as file:
337 file.write(f"algo = {self.configurator_target.absolute()} "
338 f"{self.solver.directory.absolute()} {self.sparkle_objective} \n"
339 f"execdir = {self.tmp.absolute()}/\n"
340 f"deterministic = {1 if self.solver.deterministic else 0}\n"
341 f"run_obj = {self._get_performance_measure()}\n"
342 f"cutoffTime = {self.cutoff_time}\n"
343 f"cutoff_length = {self.cutoff_length}\n"
344 f"paramfile = {self.solver.get_pcs_file()}\n"
345 f"outdir = {self.outdir_train.absolute()}\n"
346 f"instance_file = {self.instance_file_path.absolute()}\n"
347 f"test_instance_file = {self.instance_file_path.absolute()}\n")
348 if self.use_features:
349 file.write(f"feature_file = {self.feature_file_path}\n")
350 if self.wallclock_time is not None:
351 file.write(f"wallclock-limit = {self.wallclock_time}\n")
352 if self.cpu_time is not None:
353 file.write(f"cputime-limit = {self.cpu_time}\n")
354 if self.solver_calls is not None:
355 file.write(f"runcount-limit = {self.solver_calls}\n")
356 # We don't let SMAC do the validation
357 file.write("validation = false" + "\n")
359 def _prepare_instances(self: ConfigurationScenario) -> None:
360 """Create instance list file without instance specifics."""
361 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True)
362 with self.instance_file_path.open("w+") as file:
363 for instance_path in self.instance_set._instance_paths:
364 file.write(f"{instance_path.absolute()}\n")
366 def _get_performance_measure(self: ConfigurationScenario) -> str:
367 """Retrieve the performance measure of the SparkleObjective.
369 Returns:
370 Performance measure of the sparkle objective
371 """
372 if self.sparkle_objective.time:
373 return "RUNTIME"
374 return "QUALITY"
376 def _create_feature_file(self: ConfigurationScenario) -> None:
377 """Create CSV file from feature data."""
378 self.feature_file_path = Path(self.directory
379 / f"{self.instance_set.name}_features.csv")
380 self.feature_data.to_csv(self.directory
381 / self.feature_file_path, index_label="INSTANCE_NAME")
383 def _clean_up_scenario_dirs(self: ConfigurationScenario,
384 configurator_path: Path,) -> list[Path]:
385 """Yield directories to clean up after configuration scenario is done.
387 Returns:
388 list[str]: Full paths to directories that can be removed
389 """
390 result = []
391 configurator_solver_path = configurator_path / "scenarios"\
392 / f"{self.solver.name}_{self.instance_set.name}"
394 for index in range(self.number_of_runs):
395 dir = configurator_solver_path / str(index)
396 result.append(dir)
397 return result
399 @staticmethod
400 def from_file(scenario_file: Path, solver: Solver, instance_set: InstanceSet,
401 ) -> ConfigurationScenario:
402 """Reads scenario file and initalises ConfigurationScenario."""
403 config = {}
404 with scenario_file.open() as file:
405 for line in file:
406 key, value = line.strip().split(" = ")
407 config[key] = value
409 # Collect relevant settings
410 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None
411 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \
412 else None
413 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \
414 else None
415 use_features = bool(config["feature_file"]) if "feature_file" in config \
416 else None
418 objective_str = config["algo"].split(" ")[-1]
419 objective = SparkleObjective(objective_str)
420 results_folder = scenario_file.parent / "results"
421 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()]
422 number_of_runs = len(state_run_dirs)
423 return SMAC2Scenario(solver,
424 instance_set,
425 number_of_runs,
426 solver_calls,
427 cpu_time,
428 wallclock_limit,
429 int(config["cutoffTime"]),
430 config["cutoff_length"],
431 [objective],
432 use_features)