Coverage for sparkle/configurator/implementations/smac2.py: 72%
188 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
1"""Configurator classes to implement SMAC2 in Sparkle."""
2from __future__ import annotations
3from pathlib import Path
4import glob
5import shutil
6import math
8import pandas as pd
10from runrunner import Runner, Run
12from sparkle.configurator.configurator import Configurator, ConfigurationScenario
13from sparkle.solver import Solver
14from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
15from sparkle.instance import InstanceSet, Instance_Set
16from sparkle.types import SparkleObjective, resolve_objective
19class SMAC2(Configurator):
20 """Class for SMAC2 (Java) configurator."""
21 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
22 "Components/smac2-v2.10.03-master-778"
23 configurator_executable = configurator_path / "smac"
24 configurator_target = configurator_path / "smac2_target_algorithm.py"
26 version = "2.10.03"
27 full_name = "Sequential Model-based Algorithm Configuration"
29 def __init__(self: SMAC2,
30 base_dir: Path,
31 output_path: Path) -> None:
32 """Returns the SMAC2 configurator, Java SMAC V2.10.03.
34 Args:
35 objectives: The objectives to optimize. Only supports one objective.
36 base_dir: The path where the configurator will be executed in.
37 output_path: The path where the output will be placed.
38 """
39 output_path = output_path / SMAC2.__name__
40 output_path.mkdir(parents=True, exist_ok=True)
41 return super().__init__(
42 output_path=output_path,
43 base_dir=base_dir,
44 tmp_path=output_path / "tmp",
45 multi_objective_support=False)
47 @property
48 def name(self: SMAC2) -> str:
49 """Returns the name of the configurator."""
50 return SMAC2.__name__
52 @staticmethod
53 def scenario_class() -> ConfigurationScenario:
54 """Returns the SMAC2 scenario class."""
55 return SMAC2Scenario
57 def configure(self: Configurator,
58 scenario: ConfigurationScenario,
59 data_target: PerformanceDataFrame,
60 validate_after: bool = True,
61 sbatch_options: list[str] = [],
62 num_parallel_jobs: int = None,
63 base_dir: Path = None,
64 run_on: Runner = Runner.SLURM) -> list[Run]:
65 """Start configuration job.
67 Args:
68 scenario: ConfigurationScenario object
69 validate_after: Whether the configurations should be validated on the
70 train set afterwards.
71 sbatch_options: List of slurm batch options to use
72 num_parallel_jobs: The maximum number of jobs to run parallel.
73 base_dir: The path where the sbatch scripts will be created for Slurm.
74 run_on: On which platform to run the jobs. Default: Slurm.
76 Returns:
77 A RunRunner Run object.
78 """
79 if shutil.which("java") is None:
80 raise RuntimeError(
81 "SMAC2 requires Java 1.8.0_402, but Java is not installed. "
82 "Please ensure Java is installed and try again."
83 )
84 scenario.create_scenario()
85 # We set the seed over the last n run ids in the dataframe
86 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:]
87 output = [f"{(scenario.results_directory).absolute()}/"
88 f"{scenario.name}_seed_{seed}_smac.txt"
89 for seed in seeds]
90 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
91 f"{SMAC2.__name__} {output_file} {data_target.csv_filepath} "
92 f"{scenario.scenario_file_path} {seed} "
93 f"{SMAC2.configurator_executable.absolute()} "
94 f"--scenario-file {scenario.scenario_file_path} "
95 f"--seed {seed} "
96 for output_file, seed in zip(output, seeds)]
97 if num_parallel_jobs is not None:
98 num_parallel_jobs = max(num_parallel_jobs, scenario.number_of_runs)
99 return super().configure(
100 configuration_commands=cmds,
101 data_target=data_target,
102 output=output,
103 num_parallel_jobs=num_parallel_jobs,
104 scenario=scenario,
105 validation_ids=seeds if validate_after else None,
106 sbatch_options=sbatch_options,
107 base_dir=base_dir,
108 run_on=run_on
109 )
111 @staticmethod
112 def organise_output(output_source: Path,
113 output_target: Path,
114 scenario: SMAC2Scenario,
115 run_id: int) -> None | dict:
116 """Retrieves configuration from SMAC file and places them in output."""
117 from filelock import FileLock
118 call_key = SMAC2.configurator_target.name
119 # Last line describing a call is the best found configuration
120 for line in reversed(output_source.open("r").readlines()):
121 if call_key in line:
122 call_str = line.split(call_key, maxsplit=1)[1].strip()
123 # The Configuration appears after the first 7 arguments
124 configuration = call_str.split(" ", 8)[-1]
125 break
126 configuration = Solver.config_str_to_dict(configuration)
127 if output_target is None or not output_target.exists():
128 return configuration
129 time_stamp = scenario.scenario_file_path.stat().st_mtime
130 configuration["configuration_id"] =\
131 f"{SMAC2.__name__}_{time_stamp}_{run_id}"
132 instance_names = scenario.instance_set.instance_names
133 lock = FileLock(f"{output_target}.lock")
134 with lock.acquire(timeout=60):
135 performance_data = PerformanceDataFrame(output_target)
136 # Resolve absolute path to Solver column
137 solver = [s for s in performance_data.solvers
138 if Path(s).name == scenario.solver.name][0]
139 # For some reason the instance paths in the instance set are absolute
140 instances = [instance for instance in performance_data.instances
141 if Path(instance).name in instance_names]
142 # We don't set the seed in the dataframe, as that should be part of the conf
143 performance_data.set_value(
144 value=[str(configuration)],
145 solver=solver,
146 instance=instances,
147 objective=None,
148 run=run_id,
149 solver_fields=[PerformanceDataFrame.column_configuration]
150 )
151 performance_data.save_csv()
153 @staticmethod
154 def get_smac_run_obj(objective: SparkleObjective) -> str:
155 """Return the SMAC run objective based on the Performance Measure.
157 Returns:
158 A string that represents the run objective set in the settings.
159 """
160 if objective.time:
161 return "RUNTIME"
162 return "QUALITY"
164 def get_status_from_logs(self: SMAC2) -> None:
165 """Method to scan the log files of the configurator for warnings."""
166 base_dir = self.output_path / "scenarios"
167 if not base_dir.exists():
168 return
169 print(f"Checking the log files of configurator {type(self).__name__} for "
170 "warnings...")
171 scenarios = [f for f in base_dir.iterdir() if f.is_dir()]
172 for scenario in scenarios:
173 log_dir = scenario / "outdir_train_configuration" \
174 / (scenario.name + "_scenario")
175 warn_files = glob.glob(str(log_dir) + "/log-warn*")
176 non_empty = [log_file for log_file in warn_files
177 if Path(log_file).stat().st_size > 0]
178 if len(non_empty) > 0:
179 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see "
180 "the following log file(s) for more information:")
181 for log_file in non_empty:
182 print(f"\t-{log_file}")
183 else:
184 print(f"Scenario {scenario.name} has no warnings.")
187class SMAC2Scenario(ConfigurationScenario):
188 """Class to handle SMAC2 configuration scenarios."""
189 def __init__(self: SMAC2Scenario,
190 solver: Solver,
191 instance_set: InstanceSet,
192 sparkle_objectives: list[SparkleObjective],
193 parent_directory: Path,
194 number_of_runs: int = None,
195 solver_calls: int = None,
196 max_iterations: int = None,
197 cpu_time: int = None,
198 wallclock_time: int = None,
199 cutoff_time: int = None,
200 target_cutoff_length: str = None,
201 cli_cores: int = None,
202 use_cpu_time_in_tunertime: bool = None,
203 feature_data: FeatureDataFrame | Path = None)\
204 -> None:
205 """Initialize scenario paths and names.
207 Args:
208 solver: Solver that should be configured.
209 instance_set: Instances object for the scenario.
210 sparkle_objectives: SparkleObjectives used for each run of the configuration.
211 Will be simplified to the first objective.
212 parent_directory: Directory in which the scenario should be created.
213 number_of_runs: The number of configurator runs to perform
214 for configuring the solver.
215 solver_calls: The number of times the solver is called for each
216 configuration run
217 max_iterations: The maximum number of iterations allowed for each
218 configuration run. [iteration-limit, numIterations, numberOfIterations]
219 cpu_time: The time budget allocated for each configuration run. (cpu)
220 wallclock_time: The time budget allocated for each configuration run.
221 (wallclock)
222 cutoff_time: The maximum time allowed for each individual run during
223 configuration.
224 target_cutoff_length: A domain specific measure of when the algorithm
225 should consider itself done.
226 cli_cores: int
227 The number of cores to use to execute runs. Defaults in SMAC2 to 1.
228 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for
229 budget deduction. Defaults in SMAC2 to True.
230 feature_data: If features are used, this contains the feature data.
231 If it is a FeatureDataFrame, will convert values to SMAC2 format.
232 If it is a Path, will pass the path to SMAC2.
233 Defaults to None.
234 """
235 super().__init__(solver, instance_set, sparkle_objectives, parent_directory)
236 self.solver = solver
237 self.instance_set = instance_set
238 self.name = f"{self.solver.name}_{self.instance_set.name}"
240 if sparkle_objectives is not None:
241 self.sparkle_objective = sparkle_objectives[0]
242 if len(sparkle_objectives) > 1:
243 print("WARNING: SMAC2 does not have multi objective support. Only the "
244 f"first objective ({self.sparkle_objective}) will be optimised.")
245 else:
246 self.sparkle_objective = None
248 self.number_of_runs = number_of_runs
249 self.solver_calls = solver_calls
250 self.cpu_time = cpu_time
251 self.wallclock_time = wallclock_time
252 self.cutoff_time = cutoff_time
253 self.cutoff_length = target_cutoff_length
254 self.max_iterations = max_iterations
255 self.cli_cores = cli_cores
256 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime
258 self.feature_data = feature_data
259 self.feature_file_path = None
260 if self.feature_data:
261 if isinstance(self.feature_data, FeatureDataFrame):
262 # Convert feature data to SMAC2 format
263 data_dict = {}
264 for instance in self.instance_set.instance_paths:
265 data_dict[str(instance)] = feature_data.get_instance(str(instance))
267 self.feature_data = pd.DataFrame.from_dict(
268 data_dict, orient="index",
269 columns=[f"Feature{index+1}"
270 for index in range(feature_data.num_features)])
272 def map_nan(x: str) -> int:
273 """Map non-numeric values with -512 (Pre-defined by SMAC2)."""
274 if math.isnan(x):
275 return -512.0
276 try:
277 return float(x)
278 except Exception:
279 return -512.0
281 self.feature_data = self.feature_data.map(map_nan)
282 self.feature_file_path =\
283 self.directory / f"{self.instance_set.name}_features.csv"
284 elif isinstance(self.feature_data, Path): # Read from Path
285 self.feature_file_path = feature_data
286 self.feature_data = pd.read_csv(self.feature_file_path,
287 index_col=0)
288 else:
289 print(f"WARNING: Feature data is of type {type(feature_data)}. "
290 "Expected FeatureDataFrame or Path.")
292 # Scenario Paths
293 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
295 # SMAC2 Specific
296 self.outdir_train = self.directory / "outdir_train_configuration"
298 def create_scenario(self: SMAC2Scenario) -> None:
299 """Create scenario with solver and instances in the parent directory.
301 This prepares all the necessary subdirectories related to configuration.
303 Args:
304 parent_directory: Directory in which the scenario should be created.
305 """
306 # Prepare scenario directory
307 shutil.rmtree(self.directory, ignore_errors=True)
308 self.directory.mkdir(parents=True)
309 # Create empty directories as needed
310 self.outdir_train.mkdir()
311 self.tmp.mkdir()
312 self.validation.mkdir()
313 self.results_directory.mkdir(parents=True) # Prepare results directory
315 self._prepare_instances()
317 if self.feature_data is not None:
318 self._create_feature_file()
320 self.create_scenario_file()
322 def create_scenario_file(self: SMAC2Scenario) -> Path:
323 """Create a file with the configuration scenario.
325 Writes supplementary information to the target algorithm (algo =) as:
326 algo = {configurator_target} {solver_directory} {sparkle_objective}
327 """
328 with self.scenario_file_path.open("w") as file:
329 file.write(f"algo = {SMAC2.configurator_target.absolute()} "
330 f"{self.solver.directory} {self.tmp} {self.sparkle_objective} \n"
331 f"deterministic = {1 if self.solver.deterministic else 0}\n"
332 f"run_obj = {self._get_performance_measure()}\n"
333 f"cutoffTime = {self.cutoff_time}\n"
334 f"cutoff_length = {self.cutoff_length}\n"
335 f"paramfile = {self.solver.get_pcs_file()}\n"
336 f"outdir = {self.outdir_train}\n"
337 f"instance_file = {self.instance_file_path}\n"
338 f"test_instance_file = {self.instance_file_path}\n")
339 if self.max_iterations is not None:
340 file.write(f"iteration-limit = {self.max_iterations}\n")
341 if self.wallclock_time is not None:
342 file.write(f"wallclock-limit = {self.wallclock_time}\n")
343 if self.cpu_time is not None:
344 file.write(f"cputime-limit = {self.cpu_time}\n")
345 if self.solver_calls is not None:
346 file.write(f"runcount-limit = {self.solver_calls}\n")
347 if self.cli_cores is not None:
348 file.write(f"cli-cores = {self.cli_cores}")
349 if self.feature_data is not None:
350 file.write(f"feature_file = {self.feature_file_path}\n")
351 if self.use_cpu_time_in_tunertime is not None:
352 file.write("use-cpu-time-in-tunertime = "
353 f"{self.use_cpu_time_in_tunertime}\n")
354 # We don't let SMAC do the validation
355 file.write("validation = false" + "\n")
356 return self.scenario_file_path
358 def _prepare_instances(self: SMAC2Scenario) -> None:
359 """Create instance list file without instance specifics."""
360 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True)
361 with self.instance_file_path.open("w+") as file:
362 for instance_path in self.instance_set._instance_paths:
363 file.write(f"{instance_path}\n")
365 def _create_feature_file(self: SMAC2Scenario) -> None:
366 """Create CSV file from feature data."""
367 self.feature_data.to_csv(self.feature_file_path,
368 index_label="INSTANCE_NAME")
370 def _get_performance_measure(self: SMAC2Scenario) -> str:
371 """Retrieve the performance measure of the SparkleObjective.
373 Returns:
374 Performance measure of the sparkle objective
375 """
376 if self.sparkle_objective.time:
377 return "RUNTIME"
378 return "QUALITY"
380 def serialize_scenario(self: SMAC2Scenario) -> dict:
381 """Transform ConfigurationScenario to dictionary format."""
382 return {
383 "number_of_runs": self.number_of_runs,
384 "solver_calls": self.solver_calls,
385 "cpu_time": self.cpu_time,
386 "wallclock_time": self.wallclock_time,
387 "cutoff_time": self.cutoff_time,
388 "cutoff_length": self.cutoff_length,
389 "max_iterations": self.max_iterations,
390 "sparkle_objective": self.sparkle_objective.name,
391 "feature_data": self.feature_data_path,
392 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime
393 }
395 @staticmethod
396 def from_file(scenario_file: Path) -> SMAC2Scenario:
397 """Reads scenario file and initalises SMAC2Scenario."""
398 config = {keyvalue[0]: keyvalue[1]
399 for keyvalue in (line.strip().split(" = ", maxsplit=1)
400 for line in scenario_file.open().readlines()
401 if line.strip() != "")}
403 # Collect relevant settings
404 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None
405 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \
406 else None
407 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \
408 else None
409 max_iterations = int(config["iteration-limit"]) if "iteration-limit" in config \
410 else None
411 use_cpu_time_in_tunertime = config["use-cputime-in-tunertime"]\
412 if "use-cputime-in-tunertime" in config else None
413 cli_cores = config["cli-cores"] if "cli-cores" in config else None
415 _, solver_path, _, objective_str = config["algo"].split(" ")
416 objective = resolve_objective(objective_str)
417 solver = Solver(Path(solver_path.strip()))
418 # Extract the instance set from the instance file
419 instance_file_path = Path(config["instance_file"])
420 instance_set_path = Path(instance_file_path.open().readline().strip()).parent
421 instance_set = Instance_Set(Path(instance_set_path))
422 results_folder = scenario_file.parent / "results"
423 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()]
424 number_of_runs = len(state_run_dirs)
425 feature_data_path = None
426 if "feature_file" in config:
427 feature_data_path = Path(config["feature_file"])
428 return SMAC2Scenario(solver,
429 instance_set,
430 [objective],
431 instance_file_path.parent.parent,
432 number_of_runs,
433 solver_calls,
434 max_iterations,
435 cpu_time,
436 wallclock_limit,
437 int(config["cutoffTime"]),
438 config["cutoff_length"],
439 cli_cores,
440 use_cpu_time_in_tunertime,
441 feature_data_path)