Coverage for sparkle/configurator/implementations/smac2.py: 72%
189 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
1"""Configurator classes to implement SMAC2 in Sparkle."""
2from __future__ import annotations
3from pathlib import Path
4import glob
5import shutil
6import math
8import pandas as pd
10from runrunner import Runner, Run
12from sparkle.tools.parameters import PCSConvention
13from sparkle.configurator.configurator import Configurator, ConfigurationScenario
14from sparkle.solver import Solver
15from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
16from sparkle.instance import InstanceSet, Instance_Set
17from sparkle.types import SparkleObjective, resolve_objective
20class SMAC2(Configurator):
21 """Class for SMAC2 (Java) configurator."""
22 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
23 "Components/smac2-v2.10.03-master-778"
24 configurator_executable = configurator_path / "smac"
25 configurator_target = configurator_path / "smac2_target_algorithm.py"
27 version = "2.10.03"
28 full_name = "Sequential Model-based Algorithm Configuration"
30 def __init__(self: SMAC2,
31 base_dir: Path,
32 output_path: Path) -> None:
33 """Returns the SMAC2 configurator, Java SMAC V2.10.03.
35 Args:
36 objectives: The objectives to optimize. Only supports one objective.
37 base_dir: The path where the configurator will be executed in.
38 output_path: The path where the output will be placed.
39 """
40 output_path = output_path / SMAC2.__name__
41 output_path.mkdir(parents=True, exist_ok=True)
42 return super().__init__(
43 output_path=output_path,
44 base_dir=base_dir,
45 tmp_path=output_path / "tmp",
46 multi_objective_support=False)
48 @property
49 def name(self: SMAC2) -> str:
50 """Returns the name of the configurator."""
51 return SMAC2.__name__
53 @staticmethod
54 def scenario_class() -> ConfigurationScenario:
55 """Returns the SMAC2 scenario class."""
56 return SMAC2Scenario
58 def configure(self: SMAC2,
59 scenario: SMAC2Scenario,
60 data_target: PerformanceDataFrame,
61 validate_after: bool = True,
62 sbatch_options: list[str] = [],
63 slurm_prepend: str | list[str] | Path = None,
64 num_parallel_jobs: int = None,
65 base_dir: Path = None,
66 run_on: Runner = Runner.SLURM) -> list[Run]:
67 """Start configuration job.
69 Args:
70 scenario: ConfigurationScenario object
71 data_target: PerformanceDataFrame where to store the found configurations
72 validate_after: Whether the configurations should be validated on the
73 train set afterwards.
74 sbatch_options: List of slurm batch options to use
75 num_parallel_jobs: The maximum number of jobs to run parallel.
76 base_dir: The path where the sbatch scripts will be created for Slurm.
77 run_on: On which platform to run the jobs. Default: Slurm.
79 Returns:
80 A RunRunner Run object.
81 """
82 if shutil.which("java") is None:
83 raise RuntimeError(
84 "SMAC2 requires Java 1.8.0_402, but Java is not installed. "
85 "Please ensure Java is installed and try again."
86 )
87 scenario.create_scenario()
88 # We set the seed over the last n run ids in the dataframe
89 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:]
90 output = [f"{(scenario.results_directory).absolute()}/"
91 f"{scenario.name}_seed_{seed}_smac.txt"
92 for seed in seeds]
93 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
94 f"{SMAC2.__name__} {output_file} {data_target.csv_filepath} "
95 f"{scenario.scenario_file_path} {seed} "
96 f"{SMAC2.configurator_executable.absolute()} "
97 f"--scenario-file {scenario.scenario_file_path} "
98 f"--seed {seed} "
99 for output_file, seed in zip(output, seeds)]
100 if num_parallel_jobs is not None:
101 num_parallel_jobs = max(num_parallel_jobs, len(cmds))
102 return super().configure(
103 configuration_commands=cmds,
104 data_target=data_target,
105 output=output,
106 slurm_prepend=slurm_prepend,
107 num_parallel_jobs=num_parallel_jobs,
108 scenario=scenario,
109 validation_ids=seeds if validate_after else None,
110 sbatch_options=sbatch_options,
111 base_dir=base_dir,
112 run_on=run_on
113 )
115 @staticmethod
116 def organise_output(output_source: Path,
117 output_target: Path,
118 scenario: SMAC2Scenario,
119 run_id: int) -> None | dict:
120 """Retrieves configuration from SMAC file and places them in output."""
121 from filelock import FileLock
122 call_key = SMAC2.configurator_target.name
123 # Last line describing a call is the best found configuration
124 for line in reversed(output_source.open("r").readlines()):
125 if call_key in line:
126 call_str = line.split(call_key, maxsplit=1)[1].strip()
127 # The Configuration appears after the first 7 arguments
128 configuration = call_str.split(" ", 8)[-1]
129 break
130 configuration = Solver.config_str_to_dict(configuration)
131 if output_target is None or not output_target.exists():
132 return configuration
133 time_stamp = scenario.scenario_file_path.stat().st_mtime
134 configuration["configuration_id"] =\
135 f"{SMAC2.__name__}_{time_stamp}_{run_id}"
136 instance_names = scenario.instance_set.instance_names
137 lock = FileLock(f"{output_target}.lock")
138 with lock.acquire(timeout=60):
139 performance_data = PerformanceDataFrame(output_target)
140 # Resolve absolute path to Solver column
141 solver = [s for s in performance_data.solvers
142 if Path(s).name == scenario.solver.name][0]
143 # For some reason the instance paths in the instance set are absolute
144 instances = [instance for instance in performance_data.instances
145 if Path(instance).name in instance_names]
146 # We don't set the seed in the dataframe, as that should be part of the conf
147 performance_data.set_value(
148 value=[str(configuration)],
149 solver=solver,
150 instance=instances,
151 objective=None,
152 run=run_id,
153 solver_fields=[PerformanceDataFrame.column_configuration]
154 )
155 performance_data.save_csv()
157 @staticmethod
158 def get_smac_run_obj(objective: SparkleObjective) -> str:
159 """Return the SMAC run objective based on the Performance Measure.
161 Returns:
162 A string that represents the run objective set in the settings.
163 """
164 if objective.time:
165 return "RUNTIME"
166 return "QUALITY"
168 def get_status_from_logs(self: SMAC2) -> None:
169 """Method to scan the log files of the configurator for warnings."""
170 base_dir = self.output_path / "scenarios"
171 if not base_dir.exists():
172 return
173 print(f"Checking the log files of configurator {type(self).__name__} for "
174 "warnings...")
175 scenarios = [f for f in base_dir.iterdir() if f.is_dir()]
176 for scenario in scenarios:
177 log_dir = scenario / "outdir_train_configuration" \
178 / (scenario.name + "_scenario")
179 warn_files = glob.glob(str(log_dir) + "/log-warn*")
180 non_empty = [log_file for log_file in warn_files
181 if Path(log_file).stat().st_size > 0]
182 if len(non_empty) > 0:
183 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see "
184 "the following log file(s) for more information:")
185 for log_file in non_empty:
186 print(f"\t-{log_file}")
187 else:
188 print(f"Scenario {scenario.name} has no warnings.")
191class SMAC2Scenario(ConfigurationScenario):
192 """Class to handle SMAC2 configuration scenarios."""
193 def __init__(self: SMAC2Scenario,
194 solver: Solver,
195 instance_set: InstanceSet,
196 sparkle_objectives: list[SparkleObjective],
197 parent_directory: Path,
198 number_of_runs: int = None,
199 solver_calls: int = None,
200 max_iterations: int = None,
201 cpu_time: int = None,
202 wallclock_time: int = None,
203 cutoff_time: int = None,
204 target_cutoff_length: str = None,
205 cli_cores: int = None,
206 use_cpu_time_in_tunertime: bool = None,
207 feature_data: FeatureDataFrame | Path = None)\
208 -> None:
209 """Initialize scenario paths and names.
211 Args:
212 solver: Solver that should be configured.
213 instance_set: Instances object for the scenario.
214 sparkle_objectives: SparkleObjectives used for each run of the configuration.
215 Will be simplified to the first objective.
216 parent_directory: Directory in which the scenario should be created.
217 number_of_runs: The number of configurator runs to perform
218 for configuring the solver.
219 solver_calls: The number of times the solver is called for each
220 configuration run
221 max_iterations: The maximum number of iterations allowed for each
222 configuration run. [iteration-limit, numIterations, numberOfIterations]
223 cpu_time: The time budget allocated for each configuration run. (cpu)
224 wallclock_time: The time budget allocated for each configuration run.
225 (wallclock)
226 cutoff_time: The maximum time allowed for each individual run during
227 configuration.
228 target_cutoff_length: A domain specific measure of when the algorithm
229 should consider itself done.
230 cli_cores: int
231 The number of cores to use to execute runs. Defaults in SMAC2 to 1.
232 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for
233 budget deduction. Defaults in SMAC2 to True.
234 feature_data: If features are used, this contains the feature data.
235 If it is a FeatureDataFrame, will convert values to SMAC2 format.
236 If it is a Path, will pass the path to SMAC2.
237 Defaults to None.
238 """
239 super().__init__(solver, instance_set, sparkle_objectives, parent_directory)
240 self.solver = solver
241 self.instance_set = instance_set
242 self.name = f"{self.solver.name}_{self.instance_set.name}"
244 if sparkle_objectives is not None:
245 self.sparkle_objective = sparkle_objectives[0]
246 else:
247 self.sparkle_objective = None
249 self.number_of_runs = number_of_runs
250 self.solver_calls = solver_calls
251 self.cpu_time = cpu_time
252 self.wallclock_time = wallclock_time
253 self.cutoff_time = cutoff_time
254 self.cutoff_length = target_cutoff_length
255 self.max_iterations = max_iterations
256 self.cli_cores = cli_cores
257 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime
259 self.feature_data = feature_data
260 self.feature_file_path = None
261 if self.feature_data:
262 if isinstance(self.feature_data, FeatureDataFrame):
263 # Convert feature data to SMAC2 format
264 data_dict = {}
265 for instance in self.instance_set.instance_paths:
266 data_dict[str(instance)] = feature_data.get_instance(str(instance))
268 self.feature_data = pd.DataFrame.from_dict(
269 data_dict, orient="index",
270 columns=[f"Feature{index+1}"
271 for index in range(feature_data.num_features)])
273 def map_nan(x: str) -> int:
274 """Map non-numeric values with -512 (Pre-defined by SMAC2)."""
275 if math.isnan(x):
276 return -512.0
277 try:
278 return float(x)
279 except Exception:
280 return -512.0
282 self.feature_data = self.feature_data.map(map_nan)
283 self.feature_file_path =\
284 self.directory / f"{self.instance_set.name}_features.csv"
285 elif isinstance(self.feature_data, Path): # Read from Path
286 self.feature_file_path = feature_data
287 self.feature_data = pd.read_csv(self.feature_file_path,
288 index_col=0)
289 else:
290 print(f"WARNING: Feature data is of type {type(feature_data)}. "
291 "Expected FeatureDataFrame or Path.")
293 # Scenario Paths
294 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
296 # SMAC2 Specific
297 self.outdir_train = self.directory / "outdir_train_configuration"
299 def create_scenario(self: SMAC2Scenario) -> None:
300 """Create scenario with solver and instances in the parent directory.
302 This prepares all the necessary subdirectories related to configuration.
304 Args:
305 parent_directory: Directory in which the scenario should be created.
306 """
307 # Prepare scenario directory
308 shutil.rmtree(self.directory, ignore_errors=True)
309 self.directory.mkdir(parents=True)
310 # Create empty directories as needed
311 self.outdir_train.mkdir()
312 self.tmp.mkdir()
313 self.validation.mkdir()
314 self.results_directory.mkdir(parents=True) # Prepare results directory
316 self._prepare_instances()
318 if self.feature_data is not None:
319 self._create_feature_file()
321 self.create_scenario_file()
323 def create_scenario_file(
324 self: SMAC2Scenario,
325 configurator_target: Path = SMAC2.configurator_target,
326 pcs_port: PCSConvention = PCSConvention.SMAC) -> Path:
327 """Create a file with the configuration scenario.
329 Writes supplementary information to the target algorithm (algo =) as:
330 algo = {configurator_target} {solver_directory} {sparkle_objective}
331 """
332 with self.scenario_file_path.open("w") as file:
333 file.write(f"algo = {configurator_target.absolute()} "
334 f"{self.solver.directory} {self.tmp} {self.sparkle_objective} \n"
335 f"deterministic = {1 if self.solver.deterministic else 0}\n"
336 f"run_obj = {self._get_performance_measure()}\n"
337 f"cutoffTime = {self.cutoff_time}\n"
338 f"paramfile = {self.solver.get_pcs_file(pcs_port)}\n"
339 f"outdir = {self.outdir_train}\n"
340 f"instance_file = {self.instance_file_path}\n"
341 f"test_instance_file = {self.instance_file_path}\n")
342 if self.cutoff_length is not None:
343 file.write(f"cutoff_length = {self.cutoff_length}\n")
344 if self.max_iterations is not None:
345 file.write(f"iteration-limit = {self.max_iterations}\n")
346 if self.wallclock_time is not None:
347 file.write(f"wallclock-limit = {self.wallclock_time}\n")
348 if self.cpu_time is not None:
349 file.write(f"cputime-limit = {self.cpu_time}\n")
350 if self.solver_calls is not None:
351 file.write(f"runcount-limit = {self.solver_calls}\n")
352 if self.cli_cores is not None:
353 file.write(f"cli-cores = {self.cli_cores}")
354 if self.feature_data is not None:
355 file.write(f"feature_file = {self.feature_file_path}\n")
356 if self.use_cpu_time_in_tunertime is not None:
357 file.write("use-cpu-time-in-tunertime = "
358 f"{self.use_cpu_time_in_tunertime}\n")
359 # We don't let SMAC do the validation
360 file.write("validation = false" + "\n")
361 return self.scenario_file_path
363 def _prepare_instances(self: SMAC2Scenario) -> None:
364 """Create instance list file without instance specifics."""
365 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True)
366 with self.instance_file_path.open("w+") as file:
367 for instance_path in self.instance_set._instance_paths:
368 file.write(f"{instance_path}\n")
370 def _create_feature_file(self: SMAC2Scenario) -> None:
371 """Create CSV file from feature data."""
372 self.feature_data.to_csv(self.feature_file_path,
373 index_label="INSTANCE_NAME")
375 def _get_performance_measure(self: SMAC2Scenario) -> str:
376 """Retrieve the performance measure of the SparkleObjective.
378 Returns:
379 Performance measure of the sparkle objective
380 """
381 if self.sparkle_objective.time:
382 return "RUNTIME"
383 return "QUALITY"
385 def serialize_scenario(self: SMAC2Scenario) -> dict:
386 """Transform ConfigurationScenario to dictionary format."""
387 return {
388 "number_of_runs": self.number_of_runs,
389 "solver_calls": self.solver_calls,
390 "cpu_time": self.cpu_time,
391 "wallclock_time": self.wallclock_time,
392 "cutoff_time": self.cutoff_time,
393 "cutoff_length": self.cutoff_length,
394 "max_iterations": self.max_iterations,
395 "sparkle_objective": self.sparkle_objective.name,
396 "feature_data": self.feature_data_path,
397 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime
398 }
400 @staticmethod
401 def from_file(scenario_file: Path) -> SMAC2Scenario:
402 """Reads scenario file and initalises SMAC2Scenario."""
403 config = {keyvalue[0]: keyvalue[1]
404 for keyvalue in (line.strip().split(" = ", maxsplit=1)
405 for line in scenario_file.open().readlines()
406 if line.strip() != "")}
408 # Collect relevant settings
409 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None
410 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \
411 else None
412 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \
413 else None
414 max_iterations = int(config["iteration-limit"]) if "iteration-limit" in config \
415 else None
416 use_cpu_time_in_tunertime = config["use-cputime-in-tunertime"]\
417 if "use-cputime-in-tunertime" in config else None
418 cli_cores = config["cli-cores"] if "cli-cores" in config else None
420 _, solver_path, _, objective_str = config["algo"].split(" ")
421 objective = resolve_objective(objective_str)
422 solver = Solver(Path(solver_path.strip()))
423 # Extract the instance set from the instance file
424 instance_file_path = Path(config["instance_file"])
425 instance_set_path = Path(instance_file_path.open().readline().strip()).parent
426 instance_set = Instance_Set(Path(instance_set_path))
427 results_folder = scenario_file.parent / "results"
428 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()]
429 number_of_runs = len(state_run_dirs)
430 feature_data_path = None
431 if "feature_file" in config:
432 feature_data_path = Path(config["feature_file"])
433 return SMAC2Scenario(solver,
434 instance_set,
435 [objective],
436 instance_file_path.parent.parent,
437 number_of_runs,
438 solver_calls,
439 max_iterations,
440 cpu_time,
441 wallclock_limit,
442 int(config["cutoffTime"]),
443 config["cutoff_length"],
444 cli_cores,
445 use_cpu_time_in_tunertime,
446 feature_data_path)