Coverage for sparkle/CLI/run_solvers.py: 88%
165 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
1#!/usr/bin/env python3
2"""Sparkle command to run solvers to get their performance data."""
3from __future__ import annotations
4import random
5import sys
6import argparse
7from pathlib import PurePath, Path
9from runrunner.base import Runner, Run
11from sparkle.solver import Solver
12from sparkle.instance import Instance_Set
13from sparkle.structures import PerformanceDataFrame
14from sparkle.types import SparkleObjective, resolve_objective
15from sparkle.instance import InstanceSet
16from sparkle.platform.settings_objects import Settings, SettingState
17from sparkle.CLI.help import global_variables as gv
18from sparkle.CLI.help import logging as sl
19from sparkle.CLI.help import argparse_custom as ac
20from sparkle.CLI.help.nicknames import resolve_object_name, resolve_instance_name
21from sparkle.CLI.initialise import check_for_initialise
24def parser_function() -> argparse.ArgumentParser:
25 """Define the command line arguments."""
26 parser = argparse.ArgumentParser(
27 description="Run solvers on instances to get their performance data.")
28 parser.add_argument(*ac.SolversArgument.names,
29 **ac.SolversArgument.kwargs)
30 parser.add_argument(*ac.InstanceSetPathsArgument.names,
31 **ac.InstanceSetPathsArgument.kwargs)
33 # Mutually exclusive: specific configuration or best configuration
34 configuration_group = parser.add_mutually_exclusive_group()
35 configuration_group.add_argument(*ac.ConfigurationArgument.names,
36 **ac.ConfigurationArgument.kwargs)
37 configuration_group.add_argument(*ac.BestConfigurationArgument.names,
38 **ac.BestConfigurationArgument.kwargs)
39 configuration_group.add_argument(*ac.AllConfigurationArgument.names,
40 **ac.AllConfigurationArgument.kwargs)
42 parser.add_argument(*ac.ObjectiveArgument.names,
43 **ac.ObjectiveArgument.kwargs)
44 parser.add_argument(*ac.PerformanceDataJobsArgument.names,
45 **ac.PerformanceDataJobsArgument.kwargs)
46 # This one is only relevant if the argument above is given
47 parser.add_argument(*ac.RecomputeRunSolversArgument.names,
48 **ac.RecomputeRunSolversArgument.kwargs)
49 parser.add_argument(*ac.SolverCutOffTimeArgument.names,
50 **ac.SolverCutOffTimeArgument.kwargs)
51 parser.add_argument(*ac.RunOnArgument.names,
52 **ac.RunOnArgument.kwargs)
53 parser.add_argument(*ac.SettingsFileArgument.names,
54 **ac.SettingsFileArgument.kwargs)
55 return parser
58def run_solvers(
59 solvers: list[Solver],
60 instances: list[str] | list[InstanceSet],
61 objectives: list[SparkleObjective],
62 seed: int,
63 cutoff_time: int,
64 configurations: list[list[dict[str, str]]],
65 sbatch_options: list[str] = None,
66 slurm_prepend: str | list[str] | Path = None,
67 log_dir: Path = None,
68 run_on: Runner = Runner.SLURM,) -> list[Run]:
69 """Run the solvers.
71 Parameters
72 ----------
73 solvers: list[solvers]
74 The solvers to run
75 instances: list[str] | list[InstanceSet]
76 The instances to run the solvers on
77 objectives: list[SparkleObjective]
78 The objective values to retrieve from the solvers
79 seed: int
80 The seed to use
81 cutoff_time: int
82 The cut off time for the solvers
83 configurations: list[list[str]]
84 The configurations to use for each solver
85 sbatch_options: list[str]
86 The sbatch options to use for the solvers
87 slurm_prepend: str | list[str] | Path
88 The script to prepend to a slurm script
89 log_dir: Path
90 The directory to use for the logs
91 run_on: Runner
92 Where to execute the solvers.
94 Returns
95 -------
96 run: runrunner.LocalRun or runrunner.SlurmRun
97 """
98 runs = []
99 # Run the solvers
100 for solver, solver_confs in zip(solvers, configurations):
101 for conf_index, conf in enumerate(solver_confs):
102 if "configuration_id" in conf.keys():
103 conf_name = conf["configuration_id"]
104 else:
105 conf_name = conf_index
106 run = solver.run(instances=instances,
107 objectives=objectives,
108 seed=seed,
109 configuration=conf,
110 cutoff_time=cutoff_time,
111 run_on=run_on,
112 sbatch_options=sbatch_options,
113 slurm_prepend=slurm_prepend,
114 log_dir=log_dir)
115 if run_on == Runner.LOCAL:
116 if isinstance(run, dict):
117 run = [run]
118 # TODO: Refactor resolving objective keys
119 status_key = [key for key in run[0]
120 if key.lower().startswith("status")][0]
121 time_key = [key for key in run[0]
122 if key.lower().startswith("cpu_time")][0]
123 for i, solver_output in enumerate(run):
124 print(f"Execution of {solver.name} ({conf_name}) on instance "
125 f"{instances[i]} completed with status "
126 f"{solver_output[status_key]} in {solver_output[time_key]} "
127 f"seconds.")
128 print("Running configured solver done!")
129 else:
130 runs.append(run)
131 return runs
134def run_solvers_performance_data(
135 performance_data: PerformanceDataFrame,
136 cutoff_time: int,
137 rerun: bool = False,
138 solvers: list[Solver] = None,
139 instances: list[str] = None,
140 sbatch_options: list[str] = None,
141 slurm_prepend: str | list[str] | Path = None,
142 run_on: Runner = Runner.SLURM) -> list[Run]:
143 """Run the solvers for the performance data.
145 Parameters
146 ----------
147 performance_data: PerformanceDataFrame
148 The performance data
149 cutoff_time: int
150 The cut off time for the solvers
151 rerun: bool
152 Run only solvers for which no data is available yet (False) or (re)run all
153 solvers to get (new) performance data for them (True)
154 solvers: list[solvers]
155 The solvers to run. If None, run all found solvers.
156 instances: list[str]
157 The instances to run the solvers on. If None, run all found instances.
158 sbatch_options: list[str]
159 The sbatch options to use
160 slurm_prepend: str | list[str] | Path
161 The script to prepend to a slurm script
162 run_on: Runner
163 Where to execute the solvers. For available values see runrunner.base.Runner
164 enum. Default: "Runner.SLURM".
166 Returns
167 -------
168 run: runrunner.LocalRun or runrunner.SlurmRun
169 If the run is local return a QueuedRun object with the information concerning
170 the run.
171 """
172 # List of jobs to do
173 jobs = performance_data.get_job_list(rerun=rerun)
175 # Edit jobs to incorporate file paths
176 jobs_with_paths = []
177 for solver, config, instance, run in jobs:
178 instance_path = resolve_instance_name(
179 instance, gv.settings().DEFAULT_instance_dir)
180 jobs_with_paths.append((solver, config, instance_path, run))
181 jobs = jobs_with_paths
183 print(f"Total number of jobs to run: {len(jobs)}")
184 # If there are no jobs, stop
185 if len(jobs) == 0:
186 return None
188 if run_on == Runner.LOCAL:
189 print("Running the solvers locally")
190 elif run_on == Runner.SLURM:
191 print("Running the solvers through Slurm")
193 solvers = performance_data.solvers if solvers is None else solvers
194 if solvers is None:
195 solver_keys = performance_data.solvers
196 solvers = [Solver(Path(s)) for s in solver_keys]
197 else: # Filter the Solvers
198 solver_keys = [str(s.directory) for s in solvers]
199 jobs = [j for j in jobs if j[0] in solver_keys]
200 # Filter the instances
201 if instances is not None:
202 jobs = [j for j in jobs if j[2] in instances]
203 # Sort the jobs per solver
204 solver_jobs = {p_solver: {} for p_solver, _, _, _ in jobs}
205 for p_solver, p_config, p_instance, p_run in jobs:
206 if p_config not in solver_jobs[p_solver]:
207 solver_jobs[p_solver][p_config] = {}
208 if p_instance not in solver_jobs[p_solver][p_config]:
209 solver_jobs[p_solver][p_config][p_instance] = [p_run]
210 else:
211 solver_jobs[p_solver][p_config][p_instance].append(p_run)
212 runrunner_runs = []
213 if run_on == Runner.LOCAL:
214 print(f"Cutoff time for each solver run: {cutoff_time} seconds")
215 for solver, solver_key in zip(solvers, solver_keys):
216 for solver_config in solver_jobs[solver_key].keys():
217 solver_instances = solver_jobs[solver_key][solver_config].keys()
218 run_ids = [solver_jobs[solver_key][solver_config][instance]
219 for instance in solver_instances]
220 if solver_instances == []:
221 print(f"Warning: No jobs for instances found for solver {solver_key}")
222 continue
223 run = solver.run_performance_dataframe(
224 solver_instances, solver_config, performance_data,
225 run_ids=run_ids, cutoff_time=cutoff_time,
226 sbatch_options=sbatch_options, slurm_prepend=slurm_prepend,
227 log_dir=sl.caller_log_dir, base_dir=sl.caller_log_dir, run_on=run_on)
228 runrunner_runs.append(run)
229 if run_on == Runner.LOCAL:
230 # Do some printing?
231 pass
232 if run_on == Runner.SLURM:
233 num_jobs = sum(len(r.jobs) for r in runrunner_runs)
234 print(f"Total number of jobs submitted: {num_jobs}")
236 return runrunner_runs
239def main(argv: list[str]) -> None:
240 """Main function of the run solvers command."""
241 # Log command call
242 sl.log_command(sys.argv)
243 check_for_initialise()
245 # Define command line arguments
246 parser = parser_function()
248 # Process command line arguments
249 args = parser.parse_args(argv)
250 if args.settings_file is not None:
251 # Do first, so other command line options can override settings from the file
252 gv.settings().read_settings_ini(args.settings_file, SettingState.CMD_LINE)
253 if args.solver_cutoff_time is not None:
254 gv.settings().set_general_solver_cutoff_time(
255 args.solver_cutoff_time, SettingState.CMD_LINE)
256 if args.run_on is not None:
257 gv.settings().set_run_on(
258 args.run_on.value, SettingState.CMD_LINE)
259 if args.best_configuration:
260 if not args.objective:
261 objective = gv.settings().get_general_sparkle_objectives()[0]
262 print("WARNING: Best configuration requested, but no objective specified. "
263 f"Defaulting to first objective: {objective}")
264 else:
265 objective = resolve_objective(args.objective)
267 # Compare current settings to latest.ini
268 prev_settings = Settings(PurePath("Settings/latest.ini"))
269 Settings.check_settings_changes(gv.settings(), prev_settings)
271 if args.solvers:
272 solvers = [resolve_object_name(solver_path,
273 gv.file_storage_data_mapping[gv.solver_nickname_list_path],
274 gv.settings().DEFAULT_solver_dir, Solver)
275 for solver_path in args.solvers]
276 else:
277 solvers = [Solver(p) for p in
278 gv.settings().DEFAULT_solver_dir.iterdir() if p.is_dir()]
280 if args.instance_path:
281 instances = [resolve_object_name(instance_path,
282 gv.file_storage_data_mapping[gv.instances_nickname_path],
283 gv.settings().DEFAULT_instance_dir, Instance_Set)
284 for instance_path in args.instance_path]
285 # Unpack the sets into instance strings
286 instances = [str(path) for set in instances for path in set.instance_paths]
287 else:
288 instances = None # TODO: Fix? Or its good like this
290 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True)
291 slurm_prepend = gv.settings().get_slurm_job_prepend()
292 # Write settings to file before starting, since they are used in callback scripts
293 gv.settings().write_used_settings()
294 run_on = gv.settings().get_run_on()
295 cutoff_time = gv.settings().get_general_solver_cutoff_time()
296 # Open the performance data csv file
297 performance_dataframe = PerformanceDataFrame(
298 gv.settings().DEFAULT_performance_data_path)
300 print("Start running solvers ...")
301 if args.performance_data_jobs:
302 runs = run_solvers_performance_data(
303 performance_data=performance_dataframe,
304 solvers=solvers,
305 instances=instances,
306 cutoff_time=cutoff_time,
307 rerun=args.recompute,
308 sbatch_options=sbatch_options,
309 slurm_prepend=slurm_prepend,
310 run_on=run_on)
311 else:
312 if args.best_configuration:
313 train_instances = None
314 if isinstance(args.best_configuration, list):
315 train_instances = [resolve_object_name(
316 instance_path,
317 gv.file_storage_data_mapping[gv.instances_nickname_path],
318 gv.settings().DEFAULT_instance_dir, Instance_Set)
319 for instance_path in args.best_configuration]
320 # Unpack the sets into instance strings
321 instances = [str(path) for set in train_instances
322 for path in set.instance_paths]
323 # Determine best configuration
324 configurations = [[performance_dataframe.best_configuration(
325 str(solver.directory), objective, train_instances)[0]]
326 for solver in solvers]
327 elif args.configuration:
328 # Sort the configurations to the solvers
329 # TODO: Add a better check that the id could only match this solver
330 configurations = []
331 for solver in solvers:
332 configurations.append([])
333 for c in args.configuration:
334 if c not in performance_dataframe.configuration_ids:
335 raise ValueError(f"Configuration id {c} not found.")
336 if c in performance_dataframe.get_configurations(
337 str(solver.directory)):
338 configurations[-1].append(c)
339 elif args.all_configurations: # All known configurations
340 configurations = [performance_dataframe.get_configurations(
341 str(solver.directory)) for solver in solvers]
342 else: # Only default configurations
343 configurations =\
344 [[PerformanceDataFrame.default_configuration] for _ in solvers]
345 # Look up and replace with the actual configurations
346 for solver_index, configs in enumerate(configurations):
347 for config_index, config in enumerate(configs):
348 configurations[solver_index][config_index] = \
349 performance_dataframe.get_full_configuration(
350 str(solvers[solver_index].directory), config)
351 if instances is None:
352 instances = []
353 for instance_dir in gv.settings().DEFAULT_instance_dir.iterdir():
354 if instance_dir.is_dir():
355 instances.append(Instance_Set(instance_dir))
357 # TODO Objective arg not used in Multi-file-instances case?
358 runs = run_solvers(
359 solvers=solvers,
360 configurations=configurations,
361 instances=instances,
362 objectives=gv.settings().get_general_sparkle_objectives(),
363 seed=random.randint(0, sys.maxsize),
364 cutoff_time=cutoff_time,
365 sbatch_options=sbatch_options,
366 slurm_prepend=slurm_prepend,
367 log_dir=sl.caller_log_dir,
368 run_on=gv.settings().get_run_on(),
369 )
371 # If there are no jobs return
372 if runs is None or all(run is None for run in runs):
373 print("Running solvers done!")
374 elif run_on == Runner.SLURM:
375 print("Running solvers through Slurm with job id(s): "
376 f'{",".join(r.run_id for r in runs if r is not None)}')
377 sys.exit(0)
380if __name__ == "__main__":
381 main(sys.argv[1:])