Coverage for sparkle/configurator/implementations/paramils.py: 68%
139 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
1"""Configurator class to use different configurators like SMAC."""
2from __future__ import annotations
3from pathlib import Path
4import shutil
6from runrunner import Runner, Run
8from sparkle.configurator.configurator import Configurator
9from sparkle.configurator.implementations.smac2 import SMAC2Scenario
10from sparkle.solver import Solver
11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
12from sparkle.instance import InstanceSet
13from sparkle.types import SparkleObjective
16class ParamILS(Configurator):
17 """Class for ParamILS (Java) configurator."""
18 configurator_path = Path(__file__).parent.parent.parent.resolve() /\
19 "Components/paramils-v3.0.0"
20 configurator_executable = configurator_path / "paramils"
21 target_algorithm = "paramils_target_algorithm.py"
22 configurator_target = configurator_path / target_algorithm
24 version = "3.0.0"
25 full_name = "Parameter Iterated Local Search"
27 def __init__(self: ParamILS,
28 base_dir: Path,
29 output_path: Path) -> None:
30 """Returns the ParamILS (Java) configurator, V3.0.0.
32 Args:
33 base_dir: The path where the configurator will be executed in.
34 output_path: The path where the output will be placed.
35 """
36 output_path = output_path / ParamILS.__name__
37 output_path.mkdir(parents=True, exist_ok=True)
38 return super().__init__(
39 output_path=output_path,
40 base_dir=base_dir,
41 tmp_path=output_path / "tmp",
42 multi_objective_support=False)
44 @property
45 def name(self: ParamILS) -> str:
46 """Returns the name of the configurator."""
47 return ParamILS.__name__
49 @staticmethod
50 def scenario_class() -> ParamILSScenario:
51 """Returns the ParamILS scenario class."""
52 return ParamILSScenario
54 def configure(self: ParamILS,
55 scenario: ParamILSScenario,
56 data_target: PerformanceDataFrame,
57 validate_after: bool = True,
58 sbatch_options: list[str] = [],
59 slurm_prepend: str | list[str] | Path = None,
60 num_parallel_jobs: int = None,
61 base_dir: Path = None,
62 run_on: Runner = Runner.SLURM) -> list[Run]:
63 """Start configuration job.
65 Args:
66 scenario: ConfigurationScenario object
67 data_target: PerformanceDataFrame where to store the found configurations
68 validate_after: Whether the Validator will be called after the configuration
69 sbatch_options: List of slurm batch options to use
70 slurm_prepend: Slurm script to prepend to the sbatch
71 num_parallel_jobs: The maximum number of jobs to run parallel.
72 base_dir: The path where the sbatch scripts will be created for Slurm.
73 run_on: On which platform to run the jobs. Default: Slurm.
75 Returns:
76 A RunRunner Run object.
77 """
78 if shutil.which("java") is None:
79 raise RuntimeError(
80 "ParamILS requires Java 1.8.0_402, but Java is not installed. "
81 "Please ensure Java is installed and try again."
82 )
83 scenario.create_scenario()
84 # We set the seed over the last n run ids in the dataframe
85 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:]
86 output = [f"{(scenario.results_directory).absolute()}/"
87 f"{scenario.name}_seed_{seed}_paramils.txt"
88 for seed in seeds]
89 # NOTE: Could add --rungroup $dirname to change the created directory name
90 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "
91 f"{ParamILS.__name__} {output_file} {data_target.csv_filepath} "
92 f"{scenario.scenario_file_path} {seed} "
93 f"{ParamILS.configurator_executable.absolute()} "
94 f"--scenario-file {scenario.scenario_file_path} "
95 f"--seed {seed} "
96 for output_file, seed in zip(output, seeds)]
97 if num_parallel_jobs is not None:
98 num_parallel_jobs = max(num_parallel_jobs, len(cmds))
99 return super().configure(
100 configuration_commands=cmds,
101 data_target=data_target,
102 output=output,
103 slurm_prepend=slurm_prepend,
104 num_parallel_jobs=num_parallel_jobs,
105 scenario=scenario,
106 validation_ids=seeds if validate_after else None,
107 sbatch_options=sbatch_options,
108 base_dir=base_dir,
109 run_on=run_on,
110 )
112 @staticmethod
113 def organise_output(output_source: Path,
114 output_target: Path = None,
115 scenario: ParamILSScenario = None,
116 run_id: int = None) -> None | dict:
117 """Retrieves configurations from SMAC files and places them in output."""
118 from filelock import FileLock
119 # Extract from log file
120 configuration = {}
121 skipping = True
122 for line in output_source.open().readlines():
123 if skipping:
124 if "[INFO ] Differences with initial configuration:" in line:
125 skipping = False
126 continue
127 if ":" not in line or "->" not in line:
128 break
129 variable = line.split(":")[0].strip()
130 value = line.split("->")[1].strip()
131 configuration[variable] = value
132 if output_target is None or not output_target.exists():
133 return configuration
134 time_stamp = scenario.scenario_file_path.stat().st_mtime
135 configuration["configuration_id"] =\
136 f"{ParamILS.__name__}_{time_stamp}_{run_id}"
137 instance_names = scenario.instance_set.instance_names
138 lock = FileLock(f"{output_target}.lock")
139 with lock.acquire(timeout=60):
140 performance_data = PerformanceDataFrame(output_target)
141 # Resolve absolute path to Solver column
142 solver = [s for s in performance_data.solvers
143 if Path(s).name == scenario.solver.name][0]
144 # For some reason the instance paths in the instance set are absolute
145 instances = [instance for instance in performance_data.instances
146 if Path(instance).name in instance_names]
147 # We don't set the seed in the dataframe, as that should be part of the conf
148 performance_data.set_value(
149 value=[str(configuration)],
150 solver=solver,
151 instance=instances,
152 objective=None,
153 run=run_id,
154 solver_fields=[PerformanceDataFrame.column_configuration]
155 )
156 performance_data.save_csv()
158 def get_status_from_logs(self: ParamILS) -> None:
159 """Method to scan the log files of the configurator for warnings."""
160 return
163class ParamILSScenario(SMAC2Scenario):
164 """Class to handle ParamILS configuration scenarios."""
166 def __init__(self: ParamILSScenario,
167 solver: Solver,
168 instance_set: InstanceSet,
169 sparkle_objectives: list[SparkleObjective],
170 parent_directory: Path,
171 number_of_runs: int = None,
172 solver_calls: int = None,
173 max_iterations: int = None,
174 cutoff_time: int = None,
175 cli_cores: int = None,
176 use_cpu_time_in_tunertime: bool = None,
177 feature_data: FeatureDataFrame | Path = None,
178 tuner_timeout: int = None,
179 focused_ils: bool = True,
180 initial_configurations: int = None,
181 min_runs: int = None,
182 max_runs: int = None,
183 random_restart: float = None,
184 )\
185 -> None:
186 """Initialize scenario paths and names.
188 Args:
189 solver: Solver that should be configured.
190 instance_set: Instances object for the scenario.
191 sparkle_objectives: SparkleObjectives used for each run of the configuration.
192 parent_directory: Directory in which the scenario should be created.
193 number_of_runs: The number of configurator runs to perform
194 for configuring the solver.
195 solver_calls: The number of times the solver is called for each
196 configuration run
197 max_iterations: The maximum number of iterations allowed for each
198 configuration run. [iteration-limit, numIterations, numberOfIterations]
199 cutoff_time: The maximum number of seconds allowed for each
200 configuration run. [time-limit, cpu-time, wallclock-time]
201 cli_cores: The maximum number of cores allowed for each
202 configuration run.
203 use_cpu_time_in_tunertime: Whether to use cpu_time in the tuner
204 time limit.
205 feature_data: The feature data for the instances in the scenario.
206 tuner_timeout: The maximum number of seconds allowed for the tuner.
207 focused_ils: Comparison approach of ParamILS.
208 True for focused ILS, false for basic.
209 initial_configurations: The number of initial configurations.
210 min_runs: The minimum number of runs required for a single configuration.
211 max_runs: The maximum number of runs allowed for a single configuration.
212 random_restart: The probability to restart from a random configuration.
213 """
214 super().__init__(solver, instance_set, sparkle_objectives, parent_directory,
215 number_of_runs, solver_calls, max_iterations, None,
216 None, cutoff_time, None, cli_cores,
217 use_cpu_time_in_tunertime, feature_data)
218 self.solver = solver
219 self.instance_set = instance_set
220 self.tuner_timeout = tuner_timeout
221 self.multi_objective = len(sparkle_objectives) > 1 # Not using MO yet in Sparkle
222 self.focused = focused_ils
223 self.initial_configurations = initial_configurations
224 self.min_runs = min_runs
225 self.max_runs = max_runs
226 self.random_restart = random_restart
228 def create_scenario_file(self: ParamILSScenario) -> Path:
229 """Create a file with the configuration scenario."""
230 from sparkle.tools.parameters import PCSConvention
231 scenario_file = super().create_scenario_file(ParamILS.configurator_target,
232 PCSConvention.ParamILS)
233 with scenario_file.open("+a") as fout:
234 fout.write("check-instances-exist = True\n")
235 if self.focused is not None:
236 approach = "FOCUSED" if self.focused else "BASIC"
237 fout.write(f"approach = {approach}\n")
238 if self.initial_configurations:
239 fout.write(f"R = {self.initial_configurations}\n")
240 if self.min_runs:
241 fout.write(f"min-runs = {self.min_runs}\n")
242 if self.max_runs:
243 fout.write(f"max-runs = {self.max_runs}\n")
244 if self.random_restart:
245 fout.write(f"random-restart = {self.random_restart}\n")
246 if self.tuner_timeout:
247 fout.write(f"tuner-timeout = {self.tuner_timeout}\n")
248 return scenario_file
250 @staticmethod
251 def from_file(scenario_file: Path) -> ParamILSScenario:
252 """Reads scenario file and initalises ConfigurationScenario."""
253 from sparkle.types import resolve_objective
254 from sparkle.instance import Instance_Set
255 config = {}
256 with scenario_file.open() as file:
257 import ast
258 for line in file:
259 key, value = line.strip().split(" = ")
260 key = key.replace("-", "_")
261 try:
262 config[key] = ast.literal_eval(value)
263 except Exception:
264 config[key] = value
266 _, solver_path, _, objective_str = config["algo"].split(" ")
267 objective = resolve_objective(objective_str)
268 solver = Solver(Path(solver_path.strip()))
269 # Extract the instance set from the instance file
270 instance_file_path = Path(config["instance_file"])
271 instance_set_path = Path(instance_file_path.open().readline().strip()).parent
272 instance_set = Instance_Set(Path(instance_set_path))
274 del config["algo"]
275 del config["run_obj"]
276 del config["deterministic"]
277 del config["paramfile"]
278 del config["instance_file"]
279 del config["test_instance_file"]
280 del config["outdir"]
281 del config["validation"]
282 del config["check_instances_exist"]
284 if "cutoffTime" in config:
285 config["cutoff_time"] = config.pop("cutoffTime")
286 if "runcount-limit" in config:
287 config["solver_calls"] = config.pop("runcount-limit")
288 if "approach" in config:
289 config["focused_ils"] = config.pop("approach") == "FOCUS"
290 if "R" in config:
291 config["initial_configurations"] = config.pop("R")
292 if "runcount_limit" in config:
293 config["solver_calls"] = config.pop("runcount_limit")
294 results_folder = scenario_file.parent / "results"
295 number_of_runs = len([p for p in results_folder.iterdir() if p.is_file()])
296 return ParamILSScenario(solver,
297 instance_set,
298 [objective],
299 scenario_file.parent.parent,
300 number_of_runs=number_of_runs,
301 **config
302 )