Coverage for sparkle/configurator/implementations/smac2.py: 73%
206 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
1"""Configurator classes to implement SMAC2 in Sparkle."""
3from __future__ import annotations
4from pathlib import Path
5import glob
6import shutil
7import math
8import random
10import pandas as pd
12from runrunner import Runner, Run
14from sparkle.tools.parameters import PCSConvention
15from sparkle.configurator.configurator import Configurator, ConfigurationScenario
16from sparkle.solver import Solver
17from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
18from sparkle.instance import InstanceSet, Instance_Set
19from sparkle.types import SparkleObjective, resolve_objective
22class SMAC2(Configurator):
23 """Class for SMAC2 (Java) configurator."""
25 configurator_path = Path(__file__).parent.resolve() / "SMAC2"
26 configurator_executable = configurator_path / "smac"
27 configurator_target = configurator_path / "smac2_target_algorithm.py"
29 full_name = "Sequential Model-based Algorithm Configuration"
30 version = "2.10.03"
32 def __init__(self: SMAC2) -> None:
33 """Returns the SMAC2 configurator, Java SMAC V2.10.03."""
34 return super().__init__(multi_objective_support=False)
36 @property
37 def name(self: SMAC2) -> str:
38 """Returns the name of the configurator."""
39 return SMAC2.__name__
41 @staticmethod
42 def scenario_class() -> ConfigurationScenario:
43 """Returns the SMAC2 scenario class."""
44 return SMAC2Scenario
46 @staticmethod
47 def check_requirements(verbose: bool = False) -> bool:
48 """Check that SMAC2 is installed."""
49 import warnings
51 if no_java := shutil.which("java") is None:
52 if verbose:
53 warnings.warn(
54 "SMAC2 requires Java 1.8.0_402, but Java is not installed. "
55 "Please ensure Java is installed."
56 )
57 if no_smac := not SMAC2.configurator_executable.exists():
58 if verbose:
59 warnings.warn(
60 "SMAC2 executable not found. Please ensure SMAC2 is installed "
61 f"in the expected Path ({SMAC2.configurator_path})."
62 )
63 return not (no_java or no_smac)
65 @staticmethod
66 def download_requirements(
67 smac2_zip_url: str = "https://github.com/ADA-research/Sparkle/raw/refs/heads/"
68 "development/Resources/Configurators/SMAC2-v2.10.03.zip",
69 ) -> None:
70 """Download SMAC2."""
71 if SMAC2.configurator_executable.exists():
72 return # Already installed
73 from urllib.request import urlopen
74 import zipfile
75 import io
77 r = urlopen(smac2_zip_url, timeout=60)
78 z = zipfile.ZipFile(io.BytesIO(r.read()))
79 z.extractall(SMAC2.configurator_path)
80 # Ensure execution rights
81 SMAC2.configurator_executable.chmod(0o755)
83 def configure(
84 self: SMAC2,
85 scenario: SMAC2Scenario,
86 data_target: PerformanceDataFrame,
87 validate_after: bool = True,
88 sbatch_options: list[str] = [],
89 slurm_prepend: str | list[str] | Path = None,
90 num_parallel_jobs: int = None,
91 base_dir: Path = None,
92 run_on: Runner = Runner.SLURM,
93 ) -> list[Run]:
94 """Start configuration job.
96 Args:
97 scenario: ConfigurationScenario object
98 data_target: PerformanceDataFrame where to store the found configurations
99 validate_after: Whether the configurations should be validated on the
100 train set afterwards.
101 sbatch_options: List of slurm batch options to use.
102 slurm_prepend: Slurm script to prepend to the sbatch.
103 num_parallel_jobs: The maximum number of jobs to run parallel.
104 base_dir: The path where the sbatch scripts will be created for Slurm.
105 run_on: On which platform to run the jobs. Default: Slurm.
107 Returns:
108 A RunRunner Run object.
109 """
110 scenario.create_scenario()
111 configuration_ids = scenario.configuration_ids
113 # The maximum seed size for SMAC2 is 999 999 999
114 seeds = [random.randint(0, 10**9 - 1) for _ in range(scenario.number_of_runs)]
115 output = [
116 f"{(scenario.results_directory).absolute()}/"
117 f"{scenario.name}_{config_id}_smac.txt"
118 for config_id in configuration_ids
119 ]
120 cmds = [
121 f"python3 {Configurator.configurator_cli_path.absolute()} "
122 f"{SMAC2.__name__} {output_file} {data_target.csv_filepath} "
123 f"{scenario.scenario_file_path} {configuration_id} "
124 f"{SMAC2.configurator_executable.absolute()} "
125 f"--scenario-file {scenario.scenario_file_path} "
126 f"--seed {seed} "
127 for output_file, configuration_id, seed in zip(
128 output, configuration_ids, seeds
129 )
130 ]
131 if num_parallel_jobs is not None:
132 num_parallel_jobs = max(num_parallel_jobs, len(cmds))
133 return super().configure(
134 configuration_commands=cmds,
135 data_target=data_target,
136 output=output,
137 num_parallel_jobs=num_parallel_jobs,
138 scenario=scenario,
139 configuration_ids=configuration_ids,
140 validate_after=validate_after,
141 sbatch_options=sbatch_options,
142 slurm_prepend=slurm_prepend,
143 base_dir=base_dir,
144 run_on=run_on,
145 )
147 @staticmethod
148 def organise_output(
149 output_source: Path,
150 output_target: Path,
151 scenario: SMAC2Scenario,
152 configuration_id: str,
153 ) -> None | dict:
154 """Retrieves configuration from SMAC file and places them in output."""
155 call_key = SMAC2.configurator_target.name
156 # Last line describing a call is the best found configuration
157 for line in reversed(output_source.open("r").readlines()):
158 if call_key in line:
159 call_str = line.split(call_key, maxsplit=1)[1].strip()
160 # The Configuration appears after the first 7 arguments
161 configuration = call_str.split(" ", 8)[-1]
162 break
163 configuration = Solver.config_str_to_dict(configuration)
164 configuration["configuration_id"] = configuration_id
165 return Configurator.save_configuration(
166 scenario, configuration_id, configuration, output_target
167 )
169 @staticmethod
170 def get_smac_run_obj(objective: SparkleObjective) -> str:
171 """Return the SMAC run objective based on the Performance Measure.
173 Returns:
174 A string that represents the run objective set in the settings.
175 """
176 if objective.time:
177 return "RUNTIME"
178 return "QUALITY"
180 def get_status_from_logs(self: SMAC2, base_dir: Path) -> None:
181 """Method to scan the log files of the configurator for warnings."""
182 if not base_dir.exists():
183 return
184 print(
185 f"Checking the log files of configurator {type(self).__name__} for "
186 "warnings..."
187 )
188 scenarios = [f for f in base_dir.iterdir() if f.is_dir()]
189 for scenario in scenarios:
190 log_dir = (
191 scenario / "outdir_train_configuration" / (scenario.name + "_scenario")
192 )
193 warn_files = glob.glob(str(log_dir) + "/log-warn*")
194 non_empty = [
195 log_file for log_file in warn_files if Path(log_file).stat().st_size > 0
196 ]
197 if len(non_empty) > 0:
198 print(
199 f"Scenario {scenario.name} has {len(non_empty)} warning(s), see "
200 "the following log file(s) for more information:"
201 )
202 for log_file in non_empty:
203 print(f"\t-{log_file}")
204 else:
205 print(f"Scenario {scenario.name} has no warnings.")
208class SMAC2Scenario(ConfigurationScenario):
209 """Class to handle SMAC2 configuration scenarios."""
211 def __init__(
212 self: SMAC2Scenario,
213 solver: Solver,
214 instance_set: InstanceSet,
215 sparkle_objectives: list[SparkleObjective],
216 number_of_runs: int,
217 parent_directory: Path,
218 solver_calls: int = None,
219 max_iterations: int = None,
220 cpu_time: int = None,
221 wallclock_time: int = None,
222 solver_cutoff_time: int = None,
223 target_cutoff_length: str = None,
224 cli_cores: int = None,
225 use_cpu_time_in_tunertime: bool = None,
226 feature_data: FeatureDataFrame | Path = None,
227 timestamp: str = None,
228 ) -> None:
229 """Initialize scenario paths and names.
231 Args:
232 solver: Solver that should be configured.
233 instance_set: Instances object for the scenario.
234 sparkle_objectives: SparkleObjectives used for each run of the configuration.
235 Will be simplified to the first objective.
236 number_of_runs: The number of configurator runs to perform
237 for configuring the solver.
238 parent_directory: Directory in which the scenario should be created.
239 solver_calls: The number of times the solver is called for each
240 configuration run
241 max_iterations: The maximum number of iterations allowed for each
242 configuration run. [iteration-limit, numIterations, numberOfIterations]
243 cpu_time: The time budget allocated for each configuration run. (cpu)
244 wallclock_time: The time budget allocated for each configuration run.
245 (wallclock)
246 solver_cutoff_time: The maximum time allowed for each solver call run during
247 configuration.
248 target_cutoff_length: A domain specific measure of when the algorithm
249 should consider itself done.
250 cli_cores: int
251 The number of cores to use to execute runs. Defaults in SMAC2 to 1.
252 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for
253 budget deduction. Defaults in SMAC2 to True.
254 feature_data: If features are used, this contains the feature data.
255 If it is a FeatureDataFrame, will convert values to SMAC2 format.
256 If it is a Path, will pass the path to SMAC2.
257 Defaults to None.
258 timestamp: An optional timestamp for the directory name.
259 """
260 super().__init__(
261 solver,
262 instance_set,
263 sparkle_objectives,
264 number_of_runs,
265 parent_directory,
266 timestamp,
267 )
268 self.solver = solver
269 self.instance_set = instance_set
271 self.sparkle_objective = sparkle_objectives[0]
272 self.solver_calls = solver_calls
273 self.cpu_time = cpu_time
274 self.wallclock_time = wallclock_time
275 self.solver_cutoff_time = solver_cutoff_time
276 self.cutoff_length = target_cutoff_length
277 self.max_iterations = max_iterations
278 self.cli_cores = cli_cores
279 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime
281 self.feature_data = feature_data
282 self._feature_file_path = None
283 if self.feature_data:
284 if isinstance(self.feature_data, FeatureDataFrame):
285 # Convert feature data to SMAC2 format
286 data_dict = {}
287 for instance in self.instance_set.instance_paths:
288 data_dict[str(instance)] = feature_data.get_instance(str(instance))
290 self.feature_data = pd.DataFrame.from_dict(
291 data_dict,
292 orient="index",
293 columns=[
294 f"Feature{index + 1}"
295 for index in range(feature_data.num_features)
296 ],
297 )
299 def map_nan(x: str) -> int:
300 """Map non-numeric values with -512 (Pre-defined by SMAC2)."""
301 if math.isnan(x):
302 return -512.0
303 try:
304 return float(x)
305 except Exception:
306 return -512.0
308 self.feature_data = self.feature_data.map(map_nan)
309 elif isinstance(self.feature_data, Path): # Read from Path
310 self._feature_file_path = feature_data
311 self.feature_data = pd.read_csv(self.feature_file_path, index_col=0)
312 else:
313 print(
314 f"WARNING: Feature data is of type {type(feature_data)}. "
315 "Expected FeatureDataFrame or Path."
316 )
318 @property
319 def instance_file_path(self: SMAC2Scenario) -> Path:
320 """Return the path of the instance file."""
321 if self.directory:
322 return self.directory / f"{self.instance_set.name}.txt"
323 return None
325 @property
326 def outdir_train(self: SMAC2Scenario) -> Path:
327 """Return the path of the train out directory."""
328 # SMAC2 Specific directory
329 if self.directory:
330 return self.directory / "outdir_train_configuration"
331 return None
333 @property
334 def feature_file_path(self: SMAC2Scenario) -> Path:
335 """Return the path of the feature file."""
336 if self._feature_file_path:
337 return self._feature_file_path
338 elif self.directory:
339 return self.directory / f"{self.instance_set.name}_features.csv"
340 else:
341 return None
343 @property
344 def configurator(self: SMAC2Scenario) -> SMAC2:
345 """Return the type of configurator the scenario belongs to."""
346 return SMAC2
348 def create_scenario(self: SMAC2Scenario) -> None:
349 """Create scenario with solver and instances in the parent directory.
351 This prepares all the necessary subdirectories related to configuration.
353 Args:
354 parent_directory: Directory in which the scenario should be created.
355 """
356 super().create_scenario()
357 self.outdir_train.mkdir()
358 self._prepare_instances()
360 if self.feature_data is not None:
361 self._create_feature_file()
363 self.create_scenario_file()
365 def create_scenario_file(
366 self: SMAC2Scenario,
367 configurator_target: Path = SMAC2.configurator_target,
368 pcs_port: PCSConvention = PCSConvention.SMAC,
369 ) -> Path:
370 """Create a file with the configuration scenario.
372 Writes supplementary information to the target algorithm (algo =) as:
373 algo = {configurator_target} {solver_directory} {sparkle_objective}
374 """
375 with self.scenario_file_path.open("w") as file:
376 file.write(
377 f"algo = {configurator_target.absolute()} "
378 f"{self.solver.directory} {self.tmp} {self.sparkle_objective} \n"
379 f"deterministic = {1 if self.solver.deterministic else 0}\n"
380 f"run_obj = {self._get_performance_measure()}\n"
381 f"cutoffTime = {self.solver_cutoff_time}\n"
382 f"paramfile = {self.solver.get_pcs_file(pcs_port)}\n"
383 f"outdir = {self.outdir_train}\n"
384 f"instance_file = {self.instance_file_path}\n"
385 f"test_instance_file = {self.instance_file_path}\n"
386 )
387 if self.cutoff_length is not None:
388 file.write(f"cutoff_length = {self.cutoff_length}\n")
389 if self.max_iterations is not None:
390 file.write(f"iteration-limit = {self.max_iterations}\n")
391 if self.wallclock_time is not None:
392 file.write(f"wallclock-limit = {self.wallclock_time}\n")
393 if self.cpu_time is not None:
394 file.write(f"cputime-limit = {self.cpu_time}\n")
395 if self.solver_calls is not None:
396 file.write(f"runcount-limit = {self.solver_calls}\n")
397 if self.cli_cores is not None:
398 file.write(f"cli-cores = {self.cli_cores}")
399 if self.feature_data is not None:
400 file.write(f"feature_file = {self.feature_file_path}\n")
401 if self.use_cpu_time_in_tunertime is not None:
402 file.write(
403 f"use-cpu-time-in-tunertime = {self.use_cpu_time_in_tunertime}\n"
404 )
405 # We don't let SMAC do the validation
406 file.write("validation = false" + "\n")
407 return self.scenario_file_path
409 def _prepare_instances(self: SMAC2Scenario) -> None:
410 """Create instance list file without instance specifics."""
411 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True)
412 with self.instance_file_path.open("w+") as file:
413 for instance_path in self.instance_set._instance_paths:
414 file.write(f"{instance_path}\n")
416 def _create_feature_file(self: SMAC2Scenario) -> None:
417 """Create CSV file from feature data."""
418 self.feature_data.to_csv(self.feature_file_path, index_label="INSTANCE_NAME")
420 def _get_performance_measure(self: SMAC2Scenario) -> str:
421 """Retrieve the performance measure of the SparkleObjective.
423 Returns:
424 Performance measure of the sparkle objective
425 """
426 if self.sparkle_objective.time:
427 return "RUNTIME"
428 return "QUALITY"
430 def serialise(self: SMAC2Scenario) -> dict:
431 """Transform ConfigurationScenario to dictionary format."""
432 return {
433 "number_of_runs": self.number_of_runs,
434 "solver_calls": self.solver_calls,
435 "cpu_time": self.cpu_time,
436 "wallclock_time": self.wallclock_time,
437 "solver_cutoff_time": self.solver_cutoff_time,
438 "cutoff_length": self.cutoff_length,
439 "max_iterations": self.max_iterations,
440 "sparkle_objective": self.sparkle_objective.name,
441 "feature_data": str(self.feature_file_path),
442 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime,
443 }
445 @staticmethod
446 def from_file(scenario_file: Path) -> SMAC2Scenario:
447 """Reads scenario file and initalises SMAC2Scenario."""
448 config = {
449 keyvalue[0]: keyvalue[1]
450 for keyvalue in (
451 line.strip().split(" = ", maxsplit=1)
452 for line in scenario_file.open().readlines()
453 if line.strip() != ""
454 )
455 }
457 # Collect relevant settings
458 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None
459 wallclock_limit = (
460 int(config["wallclock-limit"]) if "wallclock-limit" in config else None
461 )
462 solver_calls = (
463 int(config["runcount-limit"]) if "runcount-limit" in config else None
464 )
465 max_iterations = (
466 int(config["iteration-limit"]) if "iteration-limit" in config else None
467 )
468 use_cpu_time_in_tunertime = (
469 config["use-cputime-in-tunertime"]
470 if "use-cputime-in-tunertime" in config
471 else None
472 )
473 cli_cores = config["cli-cores"] if "cli-cores" in config else None
475 _, solver_path, _, objective_str = config["algo"].split(" ")
476 objective = resolve_objective(objective_str)
477 solver = Solver(Path(solver_path.strip()))
478 # Extract the instance set from the instance file
479 instance_file_path = Path(config["instance_file"])
480 instance_set_path = Path(instance_file_path.open().readline().strip()).parent
481 instance_set = Instance_Set(Path(instance_set_path))
482 results_folder = scenario_file.parent / "results"
483 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()]
484 number_of_runs = len(state_run_dirs)
485 feature_data_path = None
486 if "feature_file" in config:
487 feature_data_path = Path(config["feature_file"])
488 # Get the timestamp from the scenario dir name
489 timestamp = scenario_file.parent.name.split("_")[-1]
490 return SMAC2Scenario(
491 solver,
492 instance_set,
493 [objective],
494 number_of_runs,
495 instance_file_path.parent.parent,
496 solver_calls,
497 max_iterations,
498 cpu_time,
499 wallclock_limit,
500 int(config["cutoffTime"]),
501 config["cutoff_length"],
502 cli_cores,
503 use_cpu_time_in_tunertime,
504 feature_data_path,
505 timestamp,
506 )