Coverage for sparkle/configurator/implementations/smac2.py: 74%
194 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
1"""Configurator classes to implement SMAC2 in Sparkle."""
2from __future__ import annotations
3from pathlib import Path
4import glob
5import shutil
6import math
8import pandas as pd
10from runrunner import Runner, Run
12from sparkle.tools.parameters import PCSConvention
13from sparkle.configurator.configurator import Configurator, ConfigurationScenario
14from sparkle.solver import Solver
15from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
16from sparkle.instance import InstanceSet, Instance_Set
17from sparkle.types import SparkleObjective, resolve_objective
20class SMAC2(Configurator):
21 """Class for SMAC2 (Java) configurator."""
22 configurator_path = Path(__file__).parent.resolve() / "SMAC2"
23 configurator_executable = configurator_path / "smac"
24 configurator_target = configurator_path / "smac2_target_algorithm.py"
26 full_name = "Sequential Model-based Algorithm Configuration"
27 version = "2.10.03"
29 def __init__(self: SMAC2) -> None:
30 """Returns the SMAC2 configurator, Java SMAC V2.10.03."""
31 return super().__init__(
32 multi_objective_support=False)
34 @property
35 def name(self: SMAC2) -> str:
36 """Returns the name of the configurator."""
37 return SMAC2.__name__
39 @staticmethod
40 def scenario_class() -> ConfigurationScenario:
41 """Returns the SMAC2 scenario class."""
42 return SMAC2Scenario
44 @staticmethod
45 def check_requirements(verbose: bool = False) -> bool:
46 """Check that SMAC2 is installed."""
47 import warnings
48 if no_java := shutil.which("java") is None:
49 if verbose:
50 warnings.warn(
51 "SMAC2 requires Java 1.8.0_402, but Java is not installed. "
52 "Please ensure Java is installed."
53 )
54 if no_smac := not SMAC2.configurator_executable.exists():
55 if verbose:
56 warnings.warn(
57 "SMAC2 executable not found. Please ensure SMAC2 is installed "
58 f"in the expected Path ({SMAC2.configurator_path}).")
59 return not (no_java or no_smac)
61 @staticmethod
62 def download_requirements(
63 smac2_zip_url: str = "https://github.com/ADA-research/Sparkle/raw/refs/heads/"
64 "development/Resources/Configurators/SMAC2-v2.10.03.zip"
65 ) -> None:
66 """Download SMAC2."""
67 if SMAC2.configurator_executable.exists():
68 return # Already installed
69 from urllib.request import urlopen
70 import zipfile, io
71 r = urlopen(smac2_zip_url, timeout=60)
72 z = zipfile.ZipFile(io.BytesIO(r.read()))
73 z.extractall(SMAC2.configurator_path)
74 # Ensure execution rights
75 SMAC2.configurator_executable.chmod(0o755)
77 def configure(self: SMAC2,
78 scenario: SMAC2Scenario,
79 data_target: PerformanceDataFrame,
80 validate_after: bool = True,
81 sbatch_options: list[str] = [],
82 slurm_prepend: str | list[str] | Path = None,
83 num_parallel_jobs: int = None,
84 base_dir: Path = None,
85 run_on: Runner = Runner.SLURM) -> list[Run]:
86 """Start configuration job.
88 Args:
89 scenario: ConfigurationScenario object
90 data_target: PerformanceDataFrame where to store the found configurations
91 validate_after: Whether the configurations should be validated on the
92 train set afterwards.
93 sbatch_options: List of slurm batch options to use
94 num_parallel_jobs: The maximum number of jobs to run parallel.
95 base_dir: The path where the sbatch scripts will be created for Slurm.
96 run_on: On which platform to run the jobs. Default: Slurm.
98 Returns:
99 A RunRunner Run object.
100 """
101 scenario.create_scenario()
102 configuration_ids = scenario.configuration_ids
103 # TODO: Setting seeds like this is weird and should be inspected.
104 # It could be good to take perhaps a seed from the scenario and use that
105 # to generate a seed per run
106 seeds = [i for i in range(scenario.number_of_runs)]
107 output = [f"{(scenario.results_directory).absolute()}/"
108 f"{scenario.name}_{config_id}_smac.txt"
109 for config_id in configuration_ids]
110 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
111 f"{SMAC2.__name__} {output_file} {data_target.csv_filepath} "
112 f"{scenario.scenario_file_path} {configuration_id} "
113 f"{SMAC2.configurator_executable.absolute()} "
114 f"--scenario-file {scenario.scenario_file_path} "
115 f"--seed {seed} "
116 for output_file, configuration_id, seed
117 in zip(output, configuration_ids, seeds)]
118 if num_parallel_jobs is not None:
119 num_parallel_jobs = max(num_parallel_jobs, len(cmds))
120 return super().configure(
121 configuration_commands=cmds,
122 data_target=data_target,
123 output=output,
124 num_parallel_jobs=num_parallel_jobs,
125 scenario=scenario,
126 configuration_ids=configuration_ids,
127 validate_after=validate_after,
128 sbatch_options=sbatch_options,
129 slurm_prepend=slurm_prepend,
130 base_dir=base_dir,
131 run_on=run_on
132 )
134 @staticmethod
135 def organise_output(output_source: Path,
136 output_target: Path,
137 scenario: SMAC2Scenario,
138 configuration_id: str) -> None | dict:
139 """Retrieves configuration from SMAC file and places them in output."""
140 call_key = SMAC2.configurator_target.name
141 # Last line describing a call is the best found configuration
142 for line in reversed(output_source.open("r").readlines()):
143 if call_key in line:
144 call_str = line.split(call_key, maxsplit=1)[1].strip()
145 # The Configuration appears after the first 7 arguments
146 configuration = call_str.split(" ", 8)[-1]
147 break
148 configuration = Solver.config_str_to_dict(configuration)
149 configuration["configuration_id"] = configuration_id
150 return Configurator.save_configuration(scenario, configuration_id,
151 configuration, output_target)
153 @staticmethod
154 def get_smac_run_obj(objective: SparkleObjective) -> str:
155 """Return the SMAC run objective based on the Performance Measure.
157 Returns:
158 A string that represents the run objective set in the settings.
159 """
160 if objective.time:
161 return "RUNTIME"
162 return "QUALITY"
164 def get_status_from_logs(self: SMAC2, base_dir: Path) -> None:
165 """Method to scan the log files of the configurator for warnings."""
166 if not base_dir.exists():
167 return
168 print(f"Checking the log files of configurator {type(self).__name__} for "
169 "warnings...")
170 scenarios = [f for f in base_dir.iterdir() if f.is_dir()]
171 for scenario in scenarios:
172 log_dir = scenario / "outdir_train_configuration" \
173 / (scenario.name + "_scenario")
174 warn_files = glob.glob(str(log_dir) + "/log-warn*")
175 non_empty = [log_file for log_file in warn_files
176 if Path(log_file).stat().st_size > 0]
177 if len(non_empty) > 0:
178 print(f"Scenario {scenario.name} has {len(non_empty)} warning(s), see "
179 "the following log file(s) for more information:")
180 for log_file in non_empty:
181 print(f"\t-{log_file}")
182 else:
183 print(f"Scenario {scenario.name} has no warnings.")
186class SMAC2Scenario(ConfigurationScenario):
187 """Class to handle SMAC2 configuration scenarios."""
188 def __init__(self: SMAC2Scenario,
189 solver: Solver,
190 instance_set: InstanceSet,
191 sparkle_objectives: list[SparkleObjective],
192 number_of_runs: int,
193 parent_directory: Path,
194 solver_calls: int = None,
195 max_iterations: int = None,
196 cpu_time: int = None,
197 wallclock_time: int = None,
198 solver_cutoff_time: int = None,
199 target_cutoff_length: str = None,
200 cli_cores: int = None,
201 use_cpu_time_in_tunertime: bool = None,
202 feature_data: FeatureDataFrame | Path = None)\
203 -> None:
204 """Initialize scenario paths and names.
206 Args:
207 solver: Solver that should be configured.
208 instance_set: Instances object for the scenario.
209 sparkle_objectives: SparkleObjectives used for each run of the configuration.
210 Will be simplified to the first objective.
211 number_of_runs: The number of configurator runs to perform
212 for configuring the solver.
213 parent_directory: Directory in which the scenario should be created.
214 solver_calls: The number of times the solver is called for each
215 configuration run
216 max_iterations: The maximum number of iterations allowed for each
217 configuration run. [iteration-limit, numIterations, numberOfIterations]
218 cpu_time: The time budget allocated for each configuration run. (cpu)
219 wallclock_time: The time budget allocated for each configuration run.
220 (wallclock)
221 solver_cutoff_time: The maximum time allowed for each solver call run during
222 configuration.
223 target_cutoff_length: A domain specific measure of when the algorithm
224 should consider itself done.
225 cli_cores: int
226 The number of cores to use to execute runs. Defaults in SMAC2 to 1.
227 use_cpu_time_in_tunertime: Whether to calculate SMAC2's own used time for
228 budget deduction. Defaults in SMAC2 to True.
229 feature_data: If features are used, this contains the feature data.
230 If it is a FeatureDataFrame, will convert values to SMAC2 format.
231 If it is a Path, will pass the path to SMAC2.
232 Defaults to None.
233 """
234 super().__init__(solver, instance_set, sparkle_objectives,
235 number_of_runs, parent_directory)
236 self.solver = solver
237 self.instance_set = instance_set
239 self.sparkle_objective = sparkle_objectives[0]
240 self.solver_calls = solver_calls
241 self.cpu_time = cpu_time
242 self.wallclock_time = wallclock_time
243 self.solver_cutoff_time = solver_cutoff_time
244 self.cutoff_length = target_cutoff_length
245 self.max_iterations = max_iterations
246 self.cli_cores = cli_cores
247 self.use_cpu_time_in_tunertime = use_cpu_time_in_tunertime
249 self.feature_data = feature_data
250 self.feature_file_path = None
251 if self.feature_data:
252 if isinstance(self.feature_data, FeatureDataFrame):
253 # Convert feature data to SMAC2 format
254 data_dict = {}
255 for instance in self.instance_set.instance_paths:
256 data_dict[str(instance)] = feature_data.get_instance(str(instance))
258 self.feature_data = pd.DataFrame.from_dict(
259 data_dict, orient="index",
260 columns=[f"Feature{index+1}"
261 for index in range(feature_data.num_features)])
263 def map_nan(x: str) -> int:
264 """Map non-numeric values with -512 (Pre-defined by SMAC2)."""
265 if math.isnan(x):
266 return -512.0
267 try:
268 return float(x)
269 except Exception:
270 return -512.0
272 self.feature_data = self.feature_data.map(map_nan)
273 self.feature_file_path =\
274 self.directory / f"{self.instance_set.name}_features.csv"
275 elif isinstance(self.feature_data, Path): # Read from Path
276 self.feature_file_path = feature_data
277 self.feature_data = pd.read_csv(self.feature_file_path,
278 index_col=0)
279 else:
280 print(f"WARNING: Feature data is of type {type(feature_data)}. "
281 "Expected FeatureDataFrame or Path.")
283 # Scenario Paths
284 self.instance_file_path = self.directory / f"{self.instance_set.name}.txt"
286 # SMAC2 Specific
287 self.outdir_train = self.directory / "outdir_train_configuration"
289 @property
290 def configurator(self: SMAC2Scenario) -> SMAC2:
291 """Return the type of configurator the scenario belongs to."""
292 return SMAC2
294 def create_scenario(self: SMAC2Scenario) -> None:
295 """Create scenario with solver and instances in the parent directory.
297 This prepares all the necessary subdirectories related to configuration.
299 Args:
300 parent_directory: Directory in which the scenario should be created.
301 """
302 # Prepare scenario directory
303 shutil.rmtree(self.directory, ignore_errors=True)
304 self.directory.mkdir(parents=True)
305 # Create empty directories as needed
306 self.outdir_train.mkdir()
307 self.tmp.mkdir()
308 self.validation.mkdir()
309 self.results_directory.mkdir(parents=True) # Prepare results directory
311 self._prepare_instances()
313 if self.feature_data is not None:
314 self._create_feature_file()
316 self.create_scenario_file()
318 def create_scenario_file(
319 self: SMAC2Scenario,
320 configurator_target: Path = SMAC2.configurator_target,
321 pcs_port: PCSConvention = PCSConvention.SMAC) -> Path:
322 """Create a file with the configuration scenario.
324 Writes supplementary information to the target algorithm (algo =) as:
325 algo = {configurator_target} {solver_directory} {sparkle_objective}
326 """
327 super().create_scenario_file()
328 with self.scenario_file_path.open("w") as file:
329 file.write(f"algo = {configurator_target.absolute()} "
330 f"{self.solver.directory} {self.tmp} {self.sparkle_objective} \n"
331 f"deterministic = {1 if self.solver.deterministic else 0}\n"
332 f"run_obj = {self._get_performance_measure()}\n"
333 f"cutoffTime = {self.solver_cutoff_time}\n"
334 f"paramfile = {self.solver.get_pcs_file(pcs_port)}\n"
335 f"outdir = {self.outdir_train}\n"
336 f"instance_file = {self.instance_file_path}\n"
337 f"test_instance_file = {self.instance_file_path}\n")
338 if self.cutoff_length is not None:
339 file.write(f"cutoff_length = {self.cutoff_length}\n")
340 if self.max_iterations is not None:
341 file.write(f"iteration-limit = {self.max_iterations}\n")
342 if self.wallclock_time is not None:
343 file.write(f"wallclock-limit = {self.wallclock_time}\n")
344 if self.cpu_time is not None:
345 file.write(f"cputime-limit = {self.cpu_time}\n")
346 if self.solver_calls is not None:
347 file.write(f"runcount-limit = {self.solver_calls}\n")
348 if self.cli_cores is not None:
349 file.write(f"cli-cores = {self.cli_cores}")
350 if self.feature_data is not None:
351 file.write(f"feature_file = {self.feature_file_path}\n")
352 if self.use_cpu_time_in_tunertime is not None:
353 file.write("use-cpu-time-in-tunertime = "
354 f"{self.use_cpu_time_in_tunertime}\n")
355 # We don't let SMAC do the validation
356 file.write("validation = false" + "\n")
357 return self.scenario_file_path
359 def _prepare_instances(self: SMAC2Scenario) -> None:
360 """Create instance list file without instance specifics."""
361 self.instance_file_path.parent.mkdir(exist_ok=True, parents=True)
362 with self.instance_file_path.open("w+") as file:
363 for instance_path in self.instance_set._instance_paths:
364 file.write(f"{instance_path}\n")
366 def _create_feature_file(self: SMAC2Scenario) -> None:
367 """Create CSV file from feature data."""
368 self.feature_data.to_csv(self.feature_file_path,
369 index_label="INSTANCE_NAME")
371 def _get_performance_measure(self: SMAC2Scenario) -> str:
372 """Retrieve the performance measure of the SparkleObjective.
374 Returns:
375 Performance measure of the sparkle objective
376 """
377 if self.sparkle_objective.time:
378 return "RUNTIME"
379 return "QUALITY"
381 def serialise(self: SMAC2Scenario) -> dict:
382 """Transform ConfigurationScenario to dictionary format."""
383 return {
384 "number_of_runs": self.number_of_runs,
385 "solver_calls": self.solver_calls,
386 "cpu_time": self.cpu_time,
387 "wallclock_time": self.wallclock_time,
388 "solver_cutoff_time": self.solver_cutoff_time,
389 "cutoff_length": self.cutoff_length,
390 "max_iterations": self.max_iterations,
391 "sparkle_objective": self.sparkle_objective.name,
392 "feature_data": self.feature_file_path,
393 "use_cpu_time_in_tunertime": self.use_cpu_time_in_tunertime
394 }
396 @staticmethod
397 def from_file(scenario_file: Path) -> SMAC2Scenario:
398 """Reads scenario file and initalises SMAC2Scenario."""
399 config = {keyvalue[0]: keyvalue[1]
400 for keyvalue in (line.strip().split(" = ", maxsplit=1)
401 for line in scenario_file.open().readlines()
402 if line.strip() != "")}
404 # Collect relevant settings
405 cpu_time = int(config["cpu_time"]) if "cpu_time" in config else None
406 wallclock_limit = int(config["wallclock-limit"]) if "wallclock-limit" in config \
407 else None
408 solver_calls = int(config["runcount-limit"]) if "runcount-limit" in config \
409 else None
410 max_iterations = int(config["iteration-limit"]) if "iteration-limit" in config \
411 else None
412 use_cpu_time_in_tunertime = config["use-cputime-in-tunertime"]\
413 if "use-cputime-in-tunertime" in config else None
414 cli_cores = config["cli-cores"] if "cli-cores" in config else None
416 _, solver_path, _, objective_str = config["algo"].split(" ")
417 objective = resolve_objective(objective_str)
418 solver = Solver(Path(solver_path.strip()))
419 # Extract the instance set from the instance file
420 instance_file_path = Path(config["instance_file"])
421 instance_set_path = Path(instance_file_path.open().readline().strip()).parent
422 instance_set = Instance_Set(Path(instance_set_path))
423 results_folder = scenario_file.parent / "results"
424 state_run_dirs = [p for p in results_folder.iterdir() if p.is_file()]
425 number_of_runs = len(state_run_dirs)
426 feature_data_path = None
427 if "feature_file" in config:
428 feature_data_path = Path(config["feature_file"])
429 return SMAC2Scenario(solver,
430 instance_set,
431 [objective],
432 number_of_runs,
433 instance_file_path.parent.parent,
434 solver_calls,
435 max_iterations,
436 cpu_time,
437 wallclock_limit,
438 int(config["cutoffTime"]),
439 config["cutoff_length"],
440 cli_cores,
441 use_cpu_time_in_tunertime,
442 feature_data_path)