Coverage for sparkle/configurator/implementations/paramils.py: 68%

139 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-03 10:42 +0000

1"""Configurator class to use different configurators like SMAC.""" 

2from __future__ import annotations 

3from pathlib import Path 

4import shutil 

5 

6from runrunner import Runner, Run 

7 

8from sparkle.configurator.configurator import Configurator 

9from sparkle.configurator.implementations.smac2 import SMAC2Scenario 

10from sparkle.solver import Solver 

11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

12from sparkle.instance import InstanceSet 

13from sparkle.types import SparkleObjective 

14 

15 

16class ParamILS(Configurator): 

17 """Class for ParamILS (Java) configurator.""" 

18 configurator_path = Path(__file__).parent.parent.parent.resolve() /\ 

19 "Components/paramils-v3.0.0" 

20 configurator_executable = configurator_path / "paramils" 

21 target_algorithm = "paramils_target_algorithm.py" 

22 configurator_target = configurator_path / target_algorithm 

23 

24 version = "3.0.0" 

25 full_name = "Parameter Iterated Local Search" 

26 

27 def __init__(self: ParamILS, 

28 base_dir: Path, 

29 output_path: Path) -> None: 

30 """Returns the ParamILS (Java) configurator, V3.0.0. 

31 

32 Args: 

33 base_dir: The path where the configurator will be executed in. 

34 output_path: The path where the output will be placed. 

35 """ 

36 output_path = output_path / ParamILS.__name__ 

37 output_path.mkdir(parents=True, exist_ok=True) 

38 return super().__init__( 

39 output_path=output_path, 

40 base_dir=base_dir, 

41 tmp_path=output_path / "tmp", 

42 multi_objective_support=False) 

43 

44 @property 

45 def name(self: ParamILS) -> str: 

46 """Returns the name of the configurator.""" 

47 return ParamILS.__name__ 

48 

49 @staticmethod 

50 def scenario_class() -> ParamILSScenario: 

51 """Returns the ParamILS scenario class.""" 

52 return ParamILSScenario 

53 

54 def configure(self: ParamILS, 

55 scenario: ParamILSScenario, 

56 data_target: PerformanceDataFrame, 

57 validate_after: bool = True, 

58 sbatch_options: list[str] = [], 

59 slurm_prepend: str | list[str] | Path = None, 

60 num_parallel_jobs: int = None, 

61 base_dir: Path = None, 

62 run_on: Runner = Runner.SLURM) -> list[Run]: 

63 """Start configuration job. 

64 

65 Args: 

66 scenario: ConfigurationScenario object 

67 data_target: PerformanceDataFrame where to store the found configurations 

68 validate_after: Whether the Validator will be called after the configuration 

69 sbatch_options: List of slurm batch options to use 

70 slurm_prepend: Slurm script to prepend to the sbatch 

71 num_parallel_jobs: The maximum number of jobs to run parallel. 

72 base_dir: The path where the sbatch scripts will be created for Slurm. 

73 run_on: On which platform to run the jobs. Default: Slurm. 

74 

75 Returns: 

76 A RunRunner Run object. 

77 """ 

78 if shutil.which("java") is None: 

79 raise RuntimeError( 

80 "ParamILS requires Java 1.8.0_402, but Java is not installed. " 

81 "Please ensure Java is installed and try again." 

82 ) 

83 scenario.create_scenario() 

84 # We set the seed over the last n run ids in the dataframe 

85 seeds = data_target.run_ids[data_target.num_runs - scenario.number_of_runs:] 

86 output = [f"{(scenario.results_directory).absolute()}/" 

87 f"{scenario.name}_seed_{seed}_paramils.txt" 

88 for seed in seeds] 

89 # NOTE: Could add --rungroup $dirname to change the created directory name 

90 cmds = [f"python3 {Configurator.configurator_cli_path.absolute()} " 

91 f"{ParamILS.__name__} {output_file} {data_target.csv_filepath} " 

92 f"{scenario.scenario_file_path} {seed} " 

93 f"{ParamILS.configurator_executable.absolute()} " 

94 f"--scenario-file {scenario.scenario_file_path} " 

95 f"--seed {seed} " 

96 for output_file, seed in zip(output, seeds)] 

97 if num_parallel_jobs is not None: 

98 num_parallel_jobs = max(num_parallel_jobs, len(cmds)) 

99 return super().configure( 

100 configuration_commands=cmds, 

101 data_target=data_target, 

102 output=output, 

103 slurm_prepend=slurm_prepend, 

104 num_parallel_jobs=num_parallel_jobs, 

105 scenario=scenario, 

106 validation_ids=seeds if validate_after else None, 

107 sbatch_options=sbatch_options, 

108 base_dir=base_dir, 

109 run_on=run_on, 

110 ) 

111 

112 @staticmethod 

113 def organise_output(output_source: Path, 

114 output_target: Path = None, 

115 scenario: ParamILSScenario = None, 

116 run_id: int = None) -> None | dict: 

117 """Retrieves configurations from SMAC files and places them in output.""" 

118 from filelock import FileLock 

119 # Extract from log file 

120 configuration = {} 

121 skipping = True 

122 for line in output_source.open().readlines(): 

123 if skipping: 

124 if "[INFO ] Differences with initial configuration:" in line: 

125 skipping = False 

126 continue 

127 if ":" not in line or "->" not in line: 

128 break 

129 variable = line.split(":")[0].strip() 

130 value = line.split("->")[1].strip() 

131 configuration[variable] = value 

132 if output_target is None or not output_target.exists(): 

133 return configuration 

134 time_stamp = scenario.scenario_file_path.stat().st_mtime 

135 configuration["configuration_id"] =\ 

136 f"{ParamILS.__name__}_{time_stamp}_{run_id}" 

137 instance_names = scenario.instance_set.instance_names 

138 lock = FileLock(f"{output_target}.lock") 

139 with lock.acquire(timeout=60): 

140 performance_data = PerformanceDataFrame(output_target) 

141 # Resolve absolute path to Solver column 

142 solver = [s for s in performance_data.solvers 

143 if Path(s).name == scenario.solver.name][0] 

144 # For some reason the instance paths in the instance set are absolute 

145 instances = [instance for instance in performance_data.instances 

146 if Path(instance).name in instance_names] 

147 # We don't set the seed in the dataframe, as that should be part of the conf 

148 performance_data.set_value( 

149 value=[str(configuration)], 

150 solver=solver, 

151 instance=instances, 

152 objective=None, 

153 run=run_id, 

154 solver_fields=[PerformanceDataFrame.column_configuration] 

155 ) 

156 performance_data.save_csv() 

157 

158 def get_status_from_logs(self: ParamILS) -> None: 

159 """Method to scan the log files of the configurator for warnings.""" 

160 return 

161 

162 

163class ParamILSScenario(SMAC2Scenario): 

164 """Class to handle ParamILS configuration scenarios.""" 

165 

166 def __init__(self: ParamILSScenario, 

167 solver: Solver, 

168 instance_set: InstanceSet, 

169 sparkle_objectives: list[SparkleObjective], 

170 parent_directory: Path, 

171 number_of_runs: int = None, 

172 solver_calls: int = None, 

173 max_iterations: int = None, 

174 cutoff_time: int = None, 

175 cli_cores: int = None, 

176 use_cpu_time_in_tunertime: bool = None, 

177 feature_data: FeatureDataFrame | Path = None, 

178 tuner_timeout: int = None, 

179 focused_ils: bool = True, 

180 initial_configurations: int = None, 

181 min_runs: int = None, 

182 max_runs: int = None, 

183 random_restart: float = None, 

184 )\ 

185 -> None: 

186 """Initialize scenario paths and names. 

187 

188 Args: 

189 solver: Solver that should be configured. 

190 instance_set: Instances object for the scenario. 

191 sparkle_objectives: SparkleObjectives used for each run of the configuration. 

192 parent_directory: Directory in which the scenario should be created. 

193 number_of_runs: The number of configurator runs to perform 

194 for configuring the solver. 

195 solver_calls: The number of times the solver is called for each 

196 configuration run 

197 max_iterations: The maximum number of iterations allowed for each 

198 configuration run. [iteration-limit, numIterations, numberOfIterations] 

199 cutoff_time: The maximum number of seconds allowed for each 

200 configuration run. [time-limit, cpu-time, wallclock-time] 

201 cli_cores: The maximum number of cores allowed for each 

202 configuration run. 

203 use_cpu_time_in_tunertime: Whether to use cpu_time in the tuner 

204 time limit. 

205 feature_data: The feature data for the instances in the scenario. 

206 tuner_timeout: The maximum number of seconds allowed for the tuner. 

207 focused_ils: Comparison approach of ParamILS. 

208 True for focused ILS, false for basic. 

209 initial_configurations: The number of initial configurations. 

210 min_runs: The minimum number of runs required for a single configuration. 

211 max_runs: The maximum number of runs allowed for a single configuration. 

212 random_restart: The probability to restart from a random configuration. 

213 """ 

214 super().__init__(solver, instance_set, sparkle_objectives, parent_directory, 

215 number_of_runs, solver_calls, max_iterations, None, 

216 None, cutoff_time, None, cli_cores, 

217 use_cpu_time_in_tunertime, feature_data) 

218 self.solver = solver 

219 self.instance_set = instance_set 

220 self.tuner_timeout = tuner_timeout 

221 self.multi_objective = len(sparkle_objectives) > 1 # Not using MO yet in Sparkle 

222 self.focused = focused_ils 

223 self.initial_configurations = initial_configurations 

224 self.min_runs = min_runs 

225 self.max_runs = max_runs 

226 self.random_restart = random_restart 

227 

228 def create_scenario_file(self: ParamILSScenario) -> Path: 

229 """Create a file with the configuration scenario.""" 

230 from sparkle.tools.parameters import PCSConvention 

231 scenario_file = super().create_scenario_file(ParamILS.configurator_target, 

232 PCSConvention.ParamILS) 

233 with scenario_file.open("+a") as fout: 

234 fout.write("check-instances-exist = True\n") 

235 if self.focused is not None: 

236 approach = "FOCUSED" if self.focused else "BASIC" 

237 fout.write(f"approach = {approach}\n") 

238 if self.initial_configurations: 

239 fout.write(f"R = {self.initial_configurations}\n") 

240 if self.min_runs: 

241 fout.write(f"min-runs = {self.min_runs}\n") 

242 if self.max_runs: 

243 fout.write(f"max-runs = {self.max_runs}\n") 

244 if self.random_restart: 

245 fout.write(f"random-restart = {self.random_restart}\n") 

246 if self.tuner_timeout: 

247 fout.write(f"tuner-timeout = {self.tuner_timeout}\n") 

248 return scenario_file 

249 

250 @staticmethod 

251 def from_file(scenario_file: Path) -> ParamILSScenario: 

252 """Reads scenario file and initalises ConfigurationScenario.""" 

253 from sparkle.types import resolve_objective 

254 from sparkle.instance import Instance_Set 

255 config = {} 

256 with scenario_file.open() as file: 

257 import ast 

258 for line in file: 

259 key, value = line.strip().split(" = ") 

260 key = key.replace("-", "_") 

261 try: 

262 config[key] = ast.literal_eval(value) 

263 except Exception: 

264 config[key] = value 

265 

266 _, solver_path, _, objective_str = config["algo"].split(" ") 

267 objective = resolve_objective(objective_str) 

268 solver = Solver(Path(solver_path.strip())) 

269 # Extract the instance set from the instance file 

270 instance_file_path = Path(config["instance_file"]) 

271 instance_set_path = Path(instance_file_path.open().readline().strip()).parent 

272 instance_set = Instance_Set(Path(instance_set_path)) 

273 

274 del config["algo"] 

275 del config["run_obj"] 

276 del config["deterministic"] 

277 del config["paramfile"] 

278 del config["instance_file"] 

279 del config["test_instance_file"] 

280 del config["outdir"] 

281 del config["validation"] 

282 del config["check_instances_exist"] 

283 

284 if "cutoffTime" in config: 

285 config["cutoff_time"] = config.pop("cutoffTime") 

286 if "runcount-limit" in config: 

287 config["solver_calls"] = config.pop("runcount-limit") 

288 if "approach" in config: 

289 config["focused_ils"] = config.pop("approach") == "FOCUS" 

290 if "R" in config: 

291 config["initial_configurations"] = config.pop("R") 

292 if "runcount_limit" in config: 

293 config["solver_calls"] = config.pop("runcount_limit") 

294 results_folder = scenario_file.parent / "results" 

295 number_of_runs = len([p for p in results_folder.iterdir() if p.is_file()]) 

296 return ParamILSScenario(solver, 

297 instance_set, 

298 [objective], 

299 scenario_file.parent.parent, 

300 number_of_runs=number_of_runs, 

301 **config 

302 )