Coverage for sparkle/configurator/implementations/paramils.py: 68%

1"""Configurator class to use different configurators like SMAC."""

2from __future__ import annotations

3from pathlib import Path

4import shutil

6from runrunner import Runner, Run

8from sparkle.configurator.configurator import Configurator

9from sparkle.configurator.implementations.smac2 import SMAC2Scenario

10from sparkle.solver import Solver

11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame

12from sparkle.instance import InstanceSet

13from sparkle.types import SparkleObjective

16class ParamILS(Configurator):

17 """Class for ParamILS (Java) configurator."""

18 configurator_path = Path(__file__).parent.parent.parent.resolve() /\

19 "Components/paramils-v3.0.0"

20 configurator_executable = configurator_path / "paramils"

21 target_algorithm = "paramils_target_algorithm.py"

22 configurator_target = configurator_path / target_algorithm

24 version = "3.0.0"

25 full_name = "Parameter Iterated Local Search"

27 def __init__(self: ParamILS,

28 base_dir: Path,

29 output_path: Path) -> None:

30 """Returns the ParamILS (Java) configurator, V3.0.0.

32 Args:

33 base_dir: The path where the configurator will be executed in.

34 output_path: The path where the output will be placed.

35 """

36 output_path = output_path / ParamILS.__name__

37 output_path.mkdir(parents=True, exist_ok=True)

38 return super().__init__(

39 output_path=output_path,

40 base_dir=base_dir,

41 tmp_path=output_path / "tmp",

42 multi_objective_support=False)

44 @property

45 def name(self: ParamILS) -> str:

46 """Returns the name of the configurator."""

47 return ParamILS.__name__

49 @staticmethod

50 def scenario_class() -> ParamILSScenario:

51 """Returns the ParamILS scenario class."""

52 return ParamILSScenario

54 def configure(self: ParamILS,

55 scenario: ParamILSScenario,

56 data_target: PerformanceDataFrame,

57 validate_after: bool = True,

58 sbatch_options: list[str] = [],

59 slurm_prepend: str | list[str] | Path = None,

60 num_parallel_jobs: int = None,

61 base_dir: Path = None,

62 run_on: Runner = Runner.SLURM) -> list[Run]:

63 """Start configuration job.

65 Args:

66 scenario: ConfigurationScenario object

67 data_target: PerformanceDataFrame where to store the found configurations

68 validate_after: Whether the Validator will be called after the configuration

69 sbatch_options: List of slurm batch options to use

70 slurm_prepend: Slurm script to prepend to the sbatch

71 num_parallel_jobs: The maximum number of jobs to run parallel.

72 base_dir: The path where the sbatch scripts will be created for Slurm.

73 run_on: On which platform to run the jobs. Default: Slurm.

75 Returns:

76 A RunRunner Run object.

77 """

78 if shutil.which("java") is None:

79 raise RuntimeError(

80 "ParamILS requires Java 1.8.0_402, but Java is not installed. "

81 "Please ensure Java is installed and try again."

82 )

83 scenario.create_scenario()

84 # We set the seed over the last n run ids in the dataframe

85 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:]

86 output = [f"{(scenario.results_directory).absolute()}/"

87 f"{scenario.name}_seed_{seed}_paramils.txt"

88 for seed in seeds]

89 # NOTE: Could add --rungroup $dirname to change the created directory name

90 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} "

91 f"{ParamILS.__name__} {output_file} {data_target.csv_filepath} "

92 f"{scenario.scenario_file_path} {seed} "

93 f"{ParamILS.configurator_executable.absolute()} "

94 f"--scenario-file {scenario.scenario_file_path} "

95 f"--seed {seed} "

96 for output_file, seed in zip(output, seeds)]

97 if num_parallel_jobs is not None:

98 num_parallel_jobs = max(num_parallel_jobs, len(cmds))

99 return super().configure(

100 configuration_commands=cmds,

101 data_target=data_target,

102 output=output,

103 slurm_prepend=slurm_prepend,

104 num_parallel_jobs=num_parallel_jobs,

105 scenario=scenario,

106 validation_ids=seeds if validate_after else None,

107 sbatch_options=sbatch_options,

108 base_dir=base_dir,

109 run_on=run_on,

110 )

111

112 @staticmethod

113 def organise_output(output_source: Path,

114 output_target: Path = None,

115 scenario: ParamILSScenario = None,

116 run_id: int = None) -> None | dict:

117 """Retrieves configurations from SMAC files and places them in output."""

118 from filelock import FileLock

119 # Extract from log file

120 configuration = {}

121 skipping = True

122 for line in output_source.open().readlines():

123 if skipping:

124 if "[INFO ] Differences with initial configuration:" in line:

125 skipping = False

126 continue

127 if ":" not in line or "->" not in line:

128 break

129 variable = line.split(":")[0].strip()

130 value = line.split("->")[1].strip()

131 configuration[variable] = value

132 if output_target is None or not output_target.exists():

133 return configuration

134 time_stamp = scenario.scenario_file_path.stat().st_mtime

135 configuration["configuration_id"] =\

136 f"{ParamILS.__name__}_{time_stamp}_{run_id}"

137 instance_names = scenario.instance_set.instance_names

138 lock = FileLock(f"{output_target}.lock")

139 with lock.acquire(timeout=60):

140 performance_data = PerformanceDataFrame(output_target)

141 # Resolve absolute path to Solver column

142 solver = [s for s in performance_data.solvers

143 if Path(s).name == scenario.solver.name][0]

144 # For some reason the instance paths in the instance set are absolute

145 instances = [instance for instance in performance_data.instances

146 if Path(instance).name in instance_names]

147 # We don't set the seed in the dataframe, as that should be part of the conf

148 performance_data.set_value(

149 value=[str(configuration)],

150 solver=solver,

151 instance=instances,

152 objective=None,

153 run=run_id,

154 solver_fields=[PerformanceDataFrame.column_configuration]

155 )

156 performance_data.save_csv()

157

158 def get_status_from_logs(self: ParamILS) -> None:

159 """Method to scan the log files of the configurator for warnings."""

160 return

161

162

163class ParamILSScenario(SMAC2Scenario):

164 """Class to handle ParamILS configuration scenarios."""

165

166 def __init__(self: ParamILSScenario,

167 solver: Solver,

168 instance_set: InstanceSet,

169 sparkle_objectives: list[SparkleObjective],

170 parent_directory: Path,

171 number_of_runs: int = None,

172 solver_calls: int = None,

173 max_iterations: int = None,

174 cutoff_time: int = None,

175 cli_cores: int = None,

176 use_cpu_time_in_tunertime: bool = None,

177 feature_data: FeatureDataFrame | Path = None,

178 tuner_timeout: int = None,

179 focused_ils: bool = True,

180 initial_configurations: int = None,

181 min_runs: int = None,

182 max_runs: int = None,

183 random_restart: float = None,

184 )\

185 -> None:

186 """Initialize scenario paths and names.

187

188 Args:

189 solver: Solver that should be configured.

190 instance_set: Instances object for the scenario.

191 sparkle_objectives: SparkleObjectives used for each run of the configuration.

192 parent_directory: Directory in which the scenario should be created.

193 number_of_runs: The number of configurator runs to perform

194 for configuring the solver.

195 solver_calls: The number of times the solver is called for each

196 configuration run

197 max_iterations: The maximum number of iterations allowed for each

198 configuration run. [iteration-limit, numIterations, numberOfIterations]

199 cutoff_time: The maximum number of seconds allowed for each

200 configuration run. [time-limit, cpu-time, wallclock-time]

201 cli_cores: The maximum number of cores allowed for each

202 configuration run.

203 use_cpu_time_in_tunertime: Whether to use cpu_time in the tuner

204 time limit.

205 feature_data: The feature data for the instances in the scenario.

206 tuner_timeout: The maximum number of seconds allowed for the tuner.

207 focused_ils: Comparison approach of ParamILS.

208 True for focused ILS, false for basic.

209 initial_configurations: The number of initial configurations.

210 min_runs: The minimum number of runs required for a single configuration.

211 max_runs: The maximum number of runs allowed for a single configuration.

212 random_restart: The probability to restart from a random configuration.

213 """

214 super().__init__(solver, instance_set, sparkle_objectives, parent_directory,

215 number_of_runs, solver_calls, max_iterations, None,

216 None, cutoff_time, None, cli_cores,

217 use_cpu_time_in_tunertime, feature_data)

218 self.solver = solver

219 self.instance_set = instance_set

220 self.tuner_timeout = tuner_timeout

221 self.multi_objective = len(sparkle_objectives) > 1 # Not using MO yet in Sparkle

222 self.focused = focused_ils

223 self.initial_configurations = initial_configurations

224 self.min_runs = min_runs

225 self.max_runs = max_runs

226 self.random_restart = random_restart

227

228 def create_scenario_file(self: ParamILSScenario) -> Path:

229 """Create a file with the configuration scenario."""

230 from sparkle.tools.parameters import PCSConvention

231 scenario_file = super().create_scenario_file(ParamILS.configurator_target,

232 PCSConvention.ParamILS)

233 with scenario_file.open("+a") as fout:

234 fout.write("check-instances-exist = True\n")

235 if self.focused is not None:

236 approach = "FOCUSED" if self.focused else "BASIC"

237 fout.write(f"approach = {approach}\n")

238 if self.initial_configurations:

239 fout.write(f"R = {self.initial_configurations}\n")

240 if self.min_runs:

241 fout.write(f"min-runs = {self.min_runs}\n")

242 if self.max_runs:

243 fout.write(f"max-runs = {self.max_runs}\n")

244 if self.random_restart:

245 fout.write(f"random-restart = {self.random_restart}\n")

246 if self.tuner_timeout:

247 fout.write(f"tuner-timeout = {self.tuner_timeout}\n")

248 return scenario_file

249

250 @staticmethod

251 def from_file(scenario_file: Path) -> ParamILSScenario:

252 """Reads scenario file and initalises ConfigurationScenario."""

253 from sparkle.types import resolve_objective

254 from sparkle.instance import Instance_Set

255 config = {}

256 with scenario_file.open() as file:

257 import ast

258 for line in file:

259 key, value = line.strip().split(" = ")

260 key = key.replace("-", "_")

261 try:

262 config[key] = ast.literal_eval(value)

263 except Exception:

264 config[key] = value

265

266 _, solver_path, _, objective_str = config["algo"].split(" ")

267 objective = resolve_objective(objective_str)

268 solver = Solver(Path(solver_path.strip()))

269 # Extract the instance set from the instance file

270 instance_file_path = Path(config["instance_file"])

271 instance_set_path = Path(instance_file_path.open().readline().strip()).parent

272 instance_set = Instance_Set(Path(instance_set_path))

273

274 del config["algo"]

275 del config["run_obj"]

276 del config["deterministic"]

277 del config["paramfile"]

278 del config["instance_file"]

279 del config["test_instance_file"]

280 del config["outdir"]

281 del config["validation"]

282 del config["check_instances_exist"]

283

284 if "cutoffTime" in config:

285 config["cutoff_time"] = config.pop("cutoffTime")

286 if "runcount-limit" in config:

287 config["solver_calls"] = config.pop("runcount-limit")

288 if "approach" in config:

289 config["focused_ils"] = config.pop("approach") == "FOCUS"

290 if "R" in config:

291 config["initial_configurations"] = config.pop("R")

292 if "runcount_limit" in config:

293 config["solver_calls"] = config.pop("runcount_limit")

294 results_folder = scenario_file.parent / "results"

295 number_of_runs = len([p for p in results_folder.iterdir() if p.is_file()])

296 return ParamILSScenario(solver,

297 instance_set,

298 [objective],

299 scenario_file.parent.parent,

300 number_of_runs=number_of_runs,

301 **config

302 )