Coverage for src/sparkle/configurator/implementations/smac2.py: 73%
204 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-15 14:11 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-15 14:11 +0000
1"""Configurator classes to implement SMAC2 in Sparkle."""
3from __future__ import annotations
4from pathlib import Path
5import shutil
6import math
7import random
9import pandas as pd
11from runrunner import Runner, Run
13from sparkle.tools.parameters import PCSConvention
14from sparkle.configurator.configurator import Configurator, ConfigurationScenario
15from sparkle.solver import Solver
16from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
17from sparkle.instance import InstanceSet, Instance_Set
18from sparkle.types import SparkleObjective, resolve_objective
21class SMAC2(Configurator):
22 """Class for SMAC2 (Java) configurator."""
24 configurator_path = Path(__file__).parent.resolve() / "SMAC2"
25 configurator_executable = configurator_path / "smac"
26 configurator_target = configurator_path / "smac2_target_algorithm.py"
28 full_name = "Sequential Model-based Algorithm Configuration"
29 version = "2.10.03"
31 def __init__(self: SMAC2) -> None:
32 """Returns the SMAC2 configurator, Java SMAC V2.10.03."""
33 return super().__init__(multi_objective_support=False)
35 @property
36 def name(self: SMAC2) -> str:
37 """Returns the name of the configurator."""
38 return SMAC2.__name__
40 @staticmethod
41 def scenario_class() -> ConfigurationScenario:
42 """Returns the SMAC2 scenario class."""
43 return SMAC2Scenario
45 @staticmethod
46 def check_requirements(verbose: bool = False) -> bool:
47 """Check that SMAC2 is installed."""
48 import warnings
50 if no_java := shutil.which("java") is None:
51 if verbose:
52 warnings.warn(
53 "SMAC2 requires Java 1.8.0_402, but Java is not installed. "
54 "Please ensure Java is installed."
55 )
56 if no_smac := not SMAC2.configurator_executable.exists():
57 if verbose:
58 warnings.warn(
59 "SMAC2 executable not found. Please ensure SMAC2 is installed "
60 f"in the expected Path ({SMAC2.configurator_path})."
61 )
62 return not (no_java or no_smac)
64 @staticmethod
65 def download_requirements(
66 smac2_zip_url: str = "https://github.com/ADA-research/Sparkle/raw/refs/heads/"
67 "development/Resources/Configurators/SMAC2-v2.10.03.zip",
68 ) -> None:
69 """Download SMAC2."""
70 if SMAC2.configurator_executable.exists():
71 return # Already installed
72 from urllib.request import urlopen
73 import zipfile
74 import io
76 r = urlopen(smac2_zip_url, timeout=60)
77 z = zipfile.ZipFile(io.BytesIO(r.read()))
78 z.extractall(SMAC2.configurator_path)
79 # Ensure execution rights
80 SMAC2.configurator_executable.chmod(0o755)
82 def configure(
83 self: SMAC2,
84 scenario: SMAC2Scenario,
85 data_target: PerformanceDataFrame,
86 validate_after: bool = True,
87 sbatch_options: list[str] = [],
88 slurm_prepend: str | list[str] | Path = None,
89 num_parallel_jobs: int = None,
90 base_dir: Path = None,
91 run_on: Runner = Runner.SLURM,
92 ) -> list[Run]:
93 """Start configuration job.
95 Args:
96 scenario: ConfigurationScenario object
97 data_target: PerformanceDataFrame where to store the found configurations
98 validate_after: Whether the configurations should be validated on the
99 train set afterwards.
100 sbatch_options: List of slurm batch options to use.
101 slurm_prepend: Slurm script to prepend to the sbatch.
102 num_parallel_jobs: The maximum number of jobs to run parallel.
103 base_dir: The path where the sbatch scripts will be created for Slurm.
104 run_on: On which platform to run the jobs. Default: Slurm.
106 Returns:
107 A RunRunner Run object.
108 """
109 scenario.create_scenario()
110 configuration_ids = scenario.configuration_ids
112 # The maximum seed size for SMAC2 is 999 999 999
113 seeds = [random.randint(0, 10**9 - 1) for _ in range(scenario.number_of_runs)]
114 output = [
115 f"{(scenario.results_directory).absolute()}/"
116 f"{scenario.name}_{config_id}_smac.txt"
117 for config_id in configuration_ids
118 ]
119 cmds = [
120 f"python3 {Configurator.configurator_cli_path.absolute()} "
121 f"{SMAC2.__name__} {output_file} {data_target.csv_filepath} "
122 f"{scenario.scenario_file_path} {configuration_id} "
123 f"{SMAC2.configurator_executable.absolute()} "
124 f"--scenario-file {scenario.scenario_file_path} "
125 f"--seed {seed} "
126 for output_file, configuration_id, seed in zip(
127 output, configuration_ids, seeds
128 )
129 ]
130 if num_parallel_jobs is not None:
131 num_parallel_jobs = max(num_parallel_jobs, len(cmds))
132 return super().configure(
133 configuration_commands=cmds,
134 data_target=data_target,
135 output=output,
136 num_parallel_jobs=num_parallel_jobs,
137 scenario=scenario,
138 configuration_ids=configuration_ids,
139 validate_after=validate_after,
140 sbatch_options=sbatch_options,
141 slurm_prepend=slurm_prepend,
142 base_dir=base_dir,
143 run_on=run_on,
144 )
146 @staticmethod
147 def organise_output(
148 output_source: Path,
149 output_target: Path,
150 scenario: SMAC2Scenario,
151 configuration_id: str,
152 ) -> None | dict:
153 """Retrieves configuration from SMAC file and places them in output."""
154 call_key = SMAC2.configurator_target.name
155 # Last line describing a call is the best found configuration
156 for line in reversed(output_source.open("r").readlines()):
157 if call_key in line:
158 call_str = line.split(call_key, maxsplit=1)[1].strip()
159 # The Configuration appears after the first 7 arguments
160 configuration = call_str.split(" ", 8)[-1]
161 break
162 configuration = Solver.config_str_to_dict(configuration)
163 configuration["configuration_id"] = configuration_id
164 return Configurator.save_configuration(
165 scenario, configuration_id, configuration, output_target
166 )
168 @staticmethod
169 def get_smac_run_obj(objective: SparkleObjective) -> str:
170 """Return the SMAC run objective based on the Performance Measure.
172 Returns:
173 A string that represents the run objective set in the settings.
174 """
175 if objective.time:
176 return "RUNTIME"
177 return "QUALITY"
179 def get_status_from_logs(self: SMAC2, base_dir: Path) -> None:
180 """Method to scan the log files of the configurator for warnings."""
181 if not base_dir.exists():
182 return
183 print(
184 f"Checking the log files of configurator {type(self).__name__} for "
185 "warnings..."
186 )
187 scenarios = [f for f in base_dir.iterdir() if f.is_dir()]
188 for scenario in scenarios:
189 log_dir = (
190 scenario / "outdir_train_configuration" / (scenario.name + "_scenario")
191 )
192 # Collect all non empty log files paths
193 warn_files = [f for f in log_dir.glob("log-warn*") if f.stat().st_size > 0]
194 if len(warn_files) > 0:
195 print(
196 f"Scenario {scenario.name} has {len(warn_files)} warning(s), see "
197 "the following log file(s) for more information:"
198 )
199 for log_file in warn_files:
200 print(f"\t-{log_file}")
201 else:
202 print(f"Scenario {scenario.name} has no warnings.")
205class SMAC2Scenario(ConfigurationScenario):
206 """Class to handle SMAC2 configuration scenarios."""
208 def __init__(
209 self: SMAC2Scenario,
210 solver: Solver,
211 instance_set: InstanceSet,
212 sparkle_objectives: list[SparkleObjective],
213 number_of_runs: int,
214 parent_directory: Path,
215 solver_calls: int = None,
216 max_iterations: int = None,
217 cpu_time: int = None,
218 wallclock_time: int = None,
219 solver_cutoff_time: int = None,
220 target_cutoff_length: str = None,
221 cli_cores: int = None,
222 use_cpu_time_in_tunertime: bool = None,
223 feature_data: FeatureDataFrame | Path = None,
224 timestamp: str = None,
225 ) -> None:
226 """Initialize scenario paths and names.
228 Args:
229 solver: Solver that should be configured.
230 instance_set: Instances object for the scenario.
231 sparkle_objectives: SparkleObjectives used for each run of the configuration.
232 Will be simplified to the first objective.
233 number_of_runs: The number of configurator runs to perform
234 for configuring the solver.
235 parent_directory: Directory in which the scenario should be created.
236 solver_calls: The number of times the solver is called for each
237 configuration run
238 max_iterations: The maximum number of iterations allowed for each
239 configuration run. [iteration-limit, numIterations, numberOfIterations]
240 cpu_time: The time budget allocated for each configuration run. (cpu)
241 wallclock_time: The time budget allocated for each configuration run.
242 (wallclock)
243 solver_cutoff_time: The maximum time allowed for each solver call run during
244 configuration.
245 target_cutoff_length: A domain specific measure of when the algorithm
246 should consider itself done.
247 cli_cores: int
248 The number of cores to use to execute runs. Defaults in SMAC2 to 1.
249 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for
250 budget deduction. Defaults in SMAC2 to True.
251 feature_data: If features are used, this contains the feature data.
252 If it is a FeatureDataFrame, will convert values to SMAC2 format.
253 If it is a Path, will pass the path to SMAC2.
254 Defaults to None.
255 timestamp: An optional timestamp for the directory name.
256 """
257 super().__init__(
258 solver,
259 instance_set,
260 sparkle_objectives,
261 number_of_runs,
262 parent_directory,
263 timestamp,
264 )
265 self.solver = solver
266 self.instance_set = instance_set
268 self.sparkle_objective = sparkle_objectives[0]
269 self.solver_calls = solver_calls
270 self.cpu_time = cpu_time
271 self.wallclock_time = wallclock_time
272 self.solver_cutoff_time = solver_cutoff_time
273 self.cutoff_length = target_cutoff_length
274 self.max_iterations = max_iterations
275 self.cli_cores = cli_cores
276 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime
278 self.feature_data = feature_data
279 self._feature_file_path = None
280 if self.feature_data:
281 if isinstance(self.feature_data, FeatureDataFrame):
282 # Convert feature data to SMAC2 format
283 data_dict = {}
284 for instance in self.instance_set.instance_paths:
285 data_dict[str(instance)] = feature_data.get_instance(str(instance))
287 self.feature_data = pd.DataFrame.from_dict(
288 data_dict,
289 orient="index",
290 columns=[
291 f"Feature{index + 1}"
292 for index in range(feature_data.num_features)
293 ],
294 )
296 def map_nan(x: str) -> int:
297 """Map non-numeric values with -512 (Pre-defined by SMAC2)."""
298 if math.isnan(x):
299 return -512.0
300 try:
301 return float(x)
302 except Exception:
303 return -512.0
305 self.feature_data = self.feature_data.map(map_nan)
306 elif isinstance(self.feature_data, Path): # Read from Path
307 self._feature_file_path = feature_data
308 self.feature_data = pd.read_csv(self.feature_file_path, index_col=0)
309 else:
310 print(
311 f"WARNING: Feature data is of type {type(feature_data)}. "
312 "Expected FeatureDataFrame or Path."
313 )
315 @property
316 def instance_file_path(self: SMAC2Scenario) -> Path:
317 """Return the path of the instance file."""
318 if self.directory:
319 return self.directory / f"{self.instance_set.name}.txt"
320 return None
322 @property
323 def outdir_train(self: SMAC2Scenario) -> Path:
324 """Return the path of the train out directory."""
325 # SMAC2 Specific directory
326 if self.directory:
327 return self.directory / "outdir_train_configuration"
328 return None
330 @property
331 def feature_file_path(self: SMAC2Scenario) -> Path:
332 """Return the path of the feature file."""
333 if self._feature_file_path:
334 return self._feature_file_path
335 elif self.directory:
336 return self.directory / f"{self.instance_set.name}_features.csv"
337 else:
338 return None
340 @property
341 def configurator(self: SMAC2Scenario) -> SMAC2:
342 """Return the type of configurator the scenario belongs to."""
343 return SMAC2
345 def create_scenario(self: SMAC2Scenario) -> None:
346 """Create scenario with solver and instances in the parent directory.
348 This prepares all the necessary subdirectories related to configuration.
350 Args:
351 parent_directory: Directory in which the scenario should be created.
352 """
353 super().create_scenario()
354 self.outdir_train.mkdir()
355 self._prepare_instances()
357 if self.feature_data is not None:
358 self._create_feature_file()
360 self.create_scenario_file()
362 def create_scenario_file(
363 self: SMAC2Scenario,
364 configurator_target: Path = SMAC2.configurator_target,
365 pcs_port: PCSConvention = PCSConvention.SMAC,
366 ) -> Path:
367 """Create a file with the configuration scenario.
369 Writes supplementary information to the target algorithm (algo =) as:
370 algo = {configurator_target} {solver_directory} {sparkle_objective}
371 """
372 with self.scenario_file_path.open("w") as file:
373 file.write(
374 f"algo = {configurator_target.absolute()} "
375 f"{self.solver.directory} {self.tmp} {self.sparkle_objective} \n"
376 f"deterministic = {1 if self.solver.deterministic else 0}\n"
377 f"run_obj = {self._get_performance_measure()}\n"
378 f"cutoffTime = {self.solver_cutoff_time}\n"
379 f"paramfile = {self.solver.get_pcs_file(pcs_port)}\n"
380 f"outdir = {self.outdir_train}\n"
381 f"instance_file = {self.instance_file_path}\n"
382 f"test_instance_file = {self.instance_file_path}\n"
383 )
384 if self.cutoff_length is not None:
385 file.write(f"cutoff_length = {self.cutoff_length}\n")
386 if self.max_iterations is not None:
387 file.write(f"iteration-limit = {self.max_iterations}\n")
388 if self.wallclock_time is not None:
389 file.write(f"wallclock-limit = {self.wallclock_time}\n")
390 if self.cpu_time is not None:
391 file.write(f"cputime-limit = {self.cpu_time}\n")
392 if self.solver_calls is not None:
393 file.write(f"runcount-limit = {self.solver_calls}\n")
394 if self.cli_cores is not None:
395 file.write(f"cli-cores = {self.cli_cores}")
396 if self.feature_data is not None:
397 file.write(f"feature_file = {self.feature_file_path}\n")
398 if self.use_cpu_time_in_tunertime is not None:
399 file.write(
400 f"use-cpu-time-in-tunertime = {self.use_cpu_time_in_tunertime}\n"
401 )
402 # We don't let SMAC do the validation
403 file.write("validation = false" + "\n")
404 return self.scenario_file_path
406 def _prepare_instances(self: SMAC2Scenario) -> None:
407 """Create instance list file without instance specifics."""
408 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True)
409 with self.instance_file_path.open("w+") as file:
410 for instance_path in self.instance_set._instance_paths:
411 file.write(f"{instance_path}\n")
413 def _create_feature_file(self: SMAC2Scenario) -> None:
414 """Create CSV file from feature data."""
415 self.feature_data.to_csv(self.feature_file_path, index_label="INSTANCE_NAME")
417 def _get_performance_measure(self: SMAC2Scenario) -> str:
418 """Retrieve the performance measure of the SparkleObjective.
420 Returns:
421 Performance measure of the sparkle objective
422 """
423 if self.sparkle_objective.time:
424 return "RUNTIME"
425 return "QUALITY"
427 def serialise(self: SMAC2Scenario) -> dict:
428 """Transform ConfigurationScenario to dictionary format."""
429 return {
430 "number_of_runs": self.number_of_runs,
431 "solver_calls": self.solver_calls,
432 "cpu_time": self.cpu_time,
433 "wallclock_time": self.wallclock_time,
434 "solver_cutoff_time": self.solver_cutoff_time,
435 "cutoff_length": self.cutoff_length,
436 "max_iterations": self.max_iterations,
437 "sparkle_objective": self.sparkle_objective.name,
438 "feature_data": str(self.feature_file_path),
439 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime,
440 }
442 @staticmethod
443 def from_file(scenario_file: Path) -> SMAC2Scenario:
444 """Reads scenario file and initalises SMAC2Scenario."""
445 config = {
446 keyvalue[0]: keyvalue[1]
447 for keyvalue in (
448 line.strip().split(" = ", maxsplit=1)
449 for line in scenario_file.open().readlines()
450 if line.strip() != ""
451 )
452 }
454 # Collect relevant settings
455 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None
456 wallclock_limit = (
457 int(config["wallclock-limit"]) if "wallclock-limit" in config else None
458 )
459 solver_calls = (
460 int(config["runcount-limit"]) if "runcount-limit" in config else None
461 )
462 max_iterations = (
463 int(config["iteration-limit"]) if "iteration-limit" in config else None
464 )
465 use_cpu_time_in_tunertime = (
466 config["use-cputime-in-tunertime"]
467 if "use-cputime-in-tunertime" in config
468 else None
469 )
470 cli_cores = config["cli-cores"] if "cli-cores" in config else None
472 _, solver_path, _, objective_str = config["algo"].split(" ")
473 objective = resolve_objective(objective_str)
474 solver = Solver(Path(solver_path.strip()))
475 # Extract the instance set from the instance file
476 instance_file_path = Path(config["instance_file"])
477 instance_set_path = Path(instance_file_path.open().readline().strip()).parent
478 instance_set = Instance_Set(Path(instance_set_path))
479 results_folder = scenario_file.parent / "results"
480 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()]
481 number_of_runs = len(state_run_dirs)
482 feature_data_path = None
483 if "feature_file" in config:
484 feature_data_path = Path(config["feature_file"])
485 # Get the timestamp from the scenario dir name
486 timestamp = scenario_file.parent.name.split("_")[-1]
487 return SMAC2Scenario(
488 solver,
489 instance_set,
490 [objective],
491 number_of_runs,
492 instance_file_path.parent.parent,
493 solver_calls,
494 max_iterations,
495 cpu_time,
496 wallclock_limit,
497 int(config["cutoffTime"]),
498 config["cutoff_length"],
499 cli_cores,
500 use_cpu_time_in_tunertime,
501 feature_data_path,
502 timestamp,
503 )