Coverage for sparkle/CLI/run_solvers.py: 88%

165 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 13:21 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to run solvers to get their performance data.""" 

3from __future__ import annotations 

4import random 

5import sys 

6import argparse 

7from pathlib import PurePath, Path 

8 

9from runrunner.base import Runner, Run 

10 

11from sparkle.solver import Solver 

12from sparkle.instance import Instance_Set 

13from sparkle.structures import PerformanceDataFrame 

14from sparkle.types import SparkleObjective, resolve_objective 

15from sparkle.instance import InstanceSet 

16from sparkle.platform.settings_objects import Settings, SettingState 

17from sparkle.CLI.help import global_variables as gv 

18from sparkle.CLI.help import logging as sl 

19from sparkle.CLI.help import argparse_custom as ac 

20from sparkle.CLI.help.nicknames import resolve_object_name, resolve_instance_name 

21from sparkle.CLI.initialise import check_for_initialise 

22 

23 

24def parser_function() -> argparse.ArgumentParser: 

25 """Define the command line arguments.""" 

26 parser = argparse.ArgumentParser( 

27 description="Run solvers on instances to get their performance data.") 

28 parser.add_argument(*ac.SolversArgument.names, 

29 **ac.SolversArgument.kwargs) 

30 parser.add_argument(*ac.InstanceSetPathsArgument.names, 

31 **ac.InstanceSetPathsArgument.kwargs) 

32 

33 # Mutually exclusive: specific configuration or best configuration 

34 configuration_group = parser.add_mutually_exclusive_group() 

35 configuration_group.add_argument(*ac.ConfigurationArgument.names, 

36 **ac.ConfigurationArgument.kwargs) 

37 configuration_group.add_argument(*ac.BestConfigurationArgument.names, 

38 **ac.BestConfigurationArgument.kwargs) 

39 configuration_group.add_argument(*ac.AllConfigurationArgument.names, 

40 **ac.AllConfigurationArgument.kwargs) 

41 

42 parser.add_argument(*ac.ObjectiveArgument.names, 

43 **ac.ObjectiveArgument.kwargs) 

44 parser.add_argument(*ac.PerformanceDataJobsArgument.names, 

45 **ac.PerformanceDataJobsArgument.kwargs) 

46 # This one is only relevant if the argument above is given 

47 parser.add_argument(*ac.RecomputeRunSolversArgument.names, 

48 **ac.RecomputeRunSolversArgument.kwargs) 

49 parser.add_argument(*ac.SolverCutOffTimeArgument.names, 

50 **ac.SolverCutOffTimeArgument.kwargs) 

51 parser.add_argument(*ac.RunOnArgument.names, 

52 **ac.RunOnArgument.kwargs) 

53 parser.add_argument(*ac.SettingsFileArgument.names, 

54 **ac.SettingsFileArgument.kwargs) 

55 return parser 

56 

57 

58def run_solvers( 

59 solvers: list[Solver], 

60 instances: list[str] | list[InstanceSet], 

61 objectives: list[SparkleObjective], 

62 seed: int, 

63 cutoff_time: int, 

64 configurations: list[list[dict[str, str]]], 

65 sbatch_options: list[str] = None, 

66 slurm_prepend: str | list[str] | Path = None, 

67 log_dir: Path = None, 

68 run_on: Runner = Runner.SLURM,) -> list[Run]: 

69 """Run the solvers. 

70 

71 Parameters 

72 ---------- 

73 solvers: list[solvers] 

74 The solvers to run 

75 instances: list[str] | list[InstanceSet] 

76 The instances to run the solvers on 

77 objectives: list[SparkleObjective] 

78 The objective values to retrieve from the solvers 

79 seed: int 

80 The seed to use 

81 cutoff_time: int 

82 The cut off time for the solvers 

83 configurations: list[list[str]] 

84 The configurations to use for each solver 

85 sbatch_options: list[str] 

86 The sbatch options to use for the solvers 

87 slurm_prepend: str | list[str] | Path 

88 The script to prepend to a slurm script 

89 log_dir: Path 

90 The directory to use for the logs 

91 run_on: Runner 

92 Where to execute the solvers. 

93 

94 Returns 

95 ------- 

96 run: runrunner.LocalRun or runrunner.SlurmRun 

97 """ 

98 runs = [] 

99 # Run the solvers 

100 for solver, solver_confs in zip(solvers, configurations): 

101 for conf_index, conf in enumerate(solver_confs): 

102 if "configuration_id" in conf.keys(): 

103 conf_name = conf["configuration_id"] 

104 else: 

105 conf_name = conf_index 

106 run = solver.run(instances=instances, 

107 objectives=objectives, 

108 seed=seed, 

109 configuration=conf, 

110 cutoff_time=cutoff_time, 

111 run_on=run_on, 

112 sbatch_options=sbatch_options, 

113 slurm_prepend=slurm_prepend, 

114 log_dir=log_dir) 

115 if run_on == Runner.LOCAL: 

116 if isinstance(run, dict): 

117 run = [run] 

118 # TODO: Refactor resolving objective keys 

119 status_key = [key for key in run[0] 

120 if key.lower().startswith("status")][0] 

121 time_key = [key for key in run[0] 

122 if key.lower().startswith("cpu_time")][0] 

123 for i, solver_output in enumerate(run): 

124 print(f"Execution of {solver.name} ({conf_name}) on instance " 

125 f"{instances[i]} completed with status " 

126 f"{solver_output[status_key]} in {solver_output[time_key]} " 

127 f"seconds.") 

128 print("Running configured solver done!") 

129 else: 

130 runs.append(run) 

131 return runs 

132 

133 

134def run_solvers_performance_data( 

135 performance_data: PerformanceDataFrame, 

136 cutoff_time: int, 

137 rerun: bool = False, 

138 solvers: list[Solver] = None, 

139 instances: list[str] = None, 

140 sbatch_options: list[str] = None, 

141 slurm_prepend: str | list[str] | Path = None, 

142 run_on: Runner = Runner.SLURM) -> list[Run]: 

143 """Run the solvers for the performance data. 

144 

145 Parameters 

146 ---------- 

147 performance_data: PerformanceDataFrame 

148 The performance data 

149 cutoff_time: int 

150 The cut off time for the solvers 

151 rerun: bool 

152 Run only solvers for which no data is available yet (False) or (re)run all 

153 solvers to get (new) performance data for them (True) 

154 solvers: list[solvers] 

155 The solvers to run. If None, run all found solvers. 

156 instances: list[str] 

157 The instances to run the solvers on. If None, run all found instances. 

158 sbatch_options: list[str] 

159 The sbatch options to use 

160 slurm_prepend: str | list[str] | Path 

161 The script to prepend to a slurm script 

162 run_on: Runner 

163 Where to execute the solvers. For available values see runrunner.base.Runner 

164 enum. Default: "Runner.SLURM". 

165 

166 Returns 

167 ------- 

168 run: runrunner.LocalRun or runrunner.SlurmRun 

169 If the run is local return a QueuedRun object with the information concerning 

170 the run. 

171 """ 

172 # List of jobs to do 

173 jobs = performance_data.get_job_list(rerun=rerun) 

174 

175 # Edit jobs to incorporate file paths 

176 jobs_with_paths = [] 

177 for solver, config, instance, run in jobs: 

178 instance_path = resolve_instance_name( 

179 instance, gv.settings().DEFAULT_instance_dir) 

180 jobs_with_paths.append((solver, config, instance_path, run)) 

181 jobs = jobs_with_paths 

182 

183 print(f"Total number of jobs to run: {len(jobs)}") 

184 # If there are no jobs, stop 

185 if len(jobs) == 0: 

186 return None 

187 

188 if run_on == Runner.LOCAL: 

189 print("Running the solvers locally") 

190 elif run_on == Runner.SLURM: 

191 print("Running the solvers through Slurm") 

192 

193 solvers = performance_data.solvers if solvers is None else solvers 

194 if solvers is None: 

195 solver_keys = performance_data.solvers 

196 solvers = [Solver(Path(s)) for s in solver_keys] 

197 else: # Filter the Solvers 

198 solver_keys = [str(s.directory) for s in solvers] 

199 jobs = [j for j in jobs if j[0] in solver_keys] 

200 # Filter the instances 

201 if instances is not None: 

202 jobs = [j for j in jobs if j[2] in instances] 

203 # Sort the jobs per solver 

204 solver_jobs = {p_solver: {} for p_solver, _, _, _ in jobs} 

205 for p_solver, p_config, p_instance, p_run in jobs: 

206 if p_config not in solver_jobs[p_solver]: 

207 solver_jobs[p_solver][p_config] = {} 

208 if p_instance not in solver_jobs[p_solver][p_config]: 

209 solver_jobs[p_solver][p_config][p_instance] = [p_run] 

210 else: 

211 solver_jobs[p_solver][p_config][p_instance].append(p_run) 

212 runrunner_runs = [] 

213 if run_on == Runner.LOCAL: 

214 print(f"Cutoff time for each solver run: {cutoff_time} seconds") 

215 for solver, solver_key in zip(solvers, solver_keys): 

216 for solver_config in solver_jobs[solver_key].keys(): 

217 solver_instances = solver_jobs[solver_key][solver_config].keys() 

218 run_ids = [solver_jobs[solver_key][solver_config][instance] 

219 for instance in solver_instances] 

220 if solver_instances == []: 

221 print(f"Warning: No jobs for instances found for solver {solver_key}") 

222 continue 

223 run = solver.run_performance_dataframe( 

224 solver_instances, solver_config, performance_data, 

225 run_ids=run_ids, cutoff_time=cutoff_time, 

226 sbatch_options=sbatch_options, slurm_prepend=slurm_prepend, 

227 log_dir=sl.caller_log_dir, base_dir=sl.caller_log_dir, run_on=run_on) 

228 runrunner_runs.append(run) 

229 if run_on == Runner.LOCAL: 

230 # Do some printing? 

231 pass 

232 if run_on == Runner.SLURM: 

233 num_jobs = sum(len(r.jobs) for r in runrunner_runs) 

234 print(f"Total number of jobs submitted: {num_jobs}") 

235 

236 return runrunner_runs 

237 

238 

239def main(argv: list[str]) -> None: 

240 """Main function of the run solvers command.""" 

241 # Log command call 

242 sl.log_command(sys.argv) 

243 check_for_initialise() 

244 

245 # Define command line arguments 

246 parser = parser_function() 

247 

248 # Process command line arguments 

249 args = parser.parse_args(argv) 

250 if args.settings_file is not None: 

251 # Do first, so other command line options can override settings from the file 

252 gv.settings().read_settings_ini(args.settings_file, SettingState.CMD_LINE) 

253 if args.solver_cutoff_time is not None: 

254 gv.settings().set_general_solver_cutoff_time( 

255 args.solver_cutoff_time, SettingState.CMD_LINE) 

256 if args.run_on is not None: 

257 gv.settings().set_run_on( 

258 args.run_on.value, SettingState.CMD_LINE) 

259 if args.best_configuration: 

260 if not args.objective: 

261 objective = gv.settings().get_general_sparkle_objectives()[0] 

262 print("WARNING: Best configuration requested, but no objective specified. " 

263 f"Defaulting to first objective: {objective}") 

264 else: 

265 objective = resolve_objective(args.objective) 

266 

267 # Compare current settings to latest.ini 

268 prev_settings = Settings(PurePath("Settings/latest.ini")) 

269 Settings.check_settings_changes(gv.settings(), prev_settings) 

270 

271 if args.solvers: 

272 solvers = [resolve_object_name(solver_path, 

273 gv.file_storage_data_mapping[gv.solver_nickname_list_path], 

274 gv.settings().DEFAULT_solver_dir, Solver) 

275 for solver_path in args.solvers] 

276 else: 

277 solvers = [Solver(p) for p in 

278 gv.settings().DEFAULT_solver_dir.iterdir() if p.is_dir()] 

279 

280 if args.instance_path: 

281 instances = [resolve_object_name(instance_path, 

282 gv.file_storage_data_mapping[gv.instances_nickname_path], 

283 gv.settings().DEFAULT_instance_dir, Instance_Set) 

284 for instance_path in args.instance_path] 

285 # Unpack the sets into instance strings 

286 instances = [str(path) for set in instances for path in set.instance_paths] 

287 else: 

288 instances = None # TODO: Fix? Or its good like this 

289 

290 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True) 

291 slurm_prepend = gv.settings().get_slurm_job_prepend() 

292 # Write settings to file before starting, since they are used in callback scripts 

293 gv.settings().write_used_settings() 

294 run_on = gv.settings().get_run_on() 

295 cutoff_time = gv.settings().get_general_solver_cutoff_time() 

296 # Open the performance data csv file 

297 performance_dataframe = PerformanceDataFrame( 

298 gv.settings().DEFAULT_performance_data_path) 

299 

300 print("Start running solvers ...") 

301 if args.performance_data_jobs: 

302 runs = run_solvers_performance_data( 

303 performance_data=performance_dataframe, 

304 solvers=solvers, 

305 instances=instances, 

306 cutoff_time=cutoff_time, 

307 rerun=args.recompute, 

308 sbatch_options=sbatch_options, 

309 slurm_prepend=slurm_prepend, 

310 run_on=run_on) 

311 else: 

312 if args.best_configuration: 

313 train_instances = None 

314 if isinstance(args.best_configuration, list): 

315 train_instances = [resolve_object_name( 

316 instance_path, 

317 gv.file_storage_data_mapping[gv.instances_nickname_path], 

318 gv.settings().DEFAULT_instance_dir, Instance_Set) 

319 for instance_path in args.best_configuration] 

320 # Unpack the sets into instance strings 

321 instances = [str(path) for set in train_instances 

322 for path in set.instance_paths] 

323 # Determine best configuration 

324 configurations = [[performance_dataframe.best_configuration( 

325 str(solver.directory), objective, train_instances)[0]] 

326 for solver in solvers] 

327 elif args.configuration: 

328 # Sort the configurations to the solvers 

329 # TODO: Add a better check that the id could only match this solver 

330 configurations = [] 

331 for solver in solvers: 

332 configurations.append([]) 

333 for c in args.configuration: 

334 if c not in performance_dataframe.configuration_ids: 

335 raise ValueError(f"Configuration id {c} not found.") 

336 if c in performance_dataframe.get_configurations( 

337 str(solver.directory)): 

338 configurations[-1].append(c) 

339 elif args.all_configurations: # All known configurations 

340 configurations = [performance_dataframe.get_configurations( 

341 str(solver.directory)) for solver in solvers] 

342 else: # Only default configurations 

343 configurations =\ 

344 [[PerformanceDataFrame.default_configuration] for _ in solvers] 

345 # Look up and replace with the actual configurations 

346 for solver_index, configs in enumerate(configurations): 

347 for config_index, config in enumerate(configs): 

348 configurations[solver_index][config_index] = \ 

349 performance_dataframe.get_full_configuration( 

350 str(solvers[solver_index].directory), config) 

351 if instances is None: 

352 instances = [] 

353 for instance_dir in gv.settings().DEFAULT_instance_dir.iterdir(): 

354 if instance_dir.is_dir(): 

355 instances.append(Instance_Set(instance_dir)) 

356 

357 # TODO Objective arg not used in Multi-file-instances case? 

358 runs = run_solvers( 

359 solvers=solvers, 

360 configurations=configurations, 

361 instances=instances, 

362 objectives=gv.settings().get_general_sparkle_objectives(), 

363 seed=random.randint(0, sys.maxsize), 

364 cutoff_time=cutoff_time, 

365 sbatch_options=sbatch_options, 

366 slurm_prepend=slurm_prepend, 

367 log_dir=sl.caller_log_dir, 

368 run_on=gv.settings().get_run_on(), 

369 ) 

370 

371 # If there are no jobs return 

372 if runs is None or all(run is None for run in runs): 

373 print("Running solvers done!") 

374 elif run_on == Runner.SLURM: 

375 print("Running solvers through Slurm with job id(s): " 

376 f'{",".join(r.run_id for r in runs if r is not None)}') 

377 sys.exit(0) 

378 

379 

380if __name__ == "__main__": 

381 main(sys.argv[1:])