Coverage for sparkle/CLI/run_solvers.py: 88%
85 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
1#!/usr/bin/env python3
2"""Sparkle command to run solvers to get their performance data."""
3from __future__ import annotations
5import sys
6import argparse
7from pathlib import PurePath, Path
9import runrunner as rrr
10from runrunner.base import Runner, Run
12from sparkle.CLI.help import global_variables as gv
13from sparkle.structures import PerformanceDataFrame
14from sparkle.CLI.help import logging as sl
15from sparkle.platform.settings_objects import Settings, SettingState
16from sparkle.platform import CommandName, COMMAND_DEPENDENCIES
17from sparkle.CLI.initialise import check_for_initialise
18from sparkle.CLI.help import argparse_custom as ac
21def parser_function() -> argparse.ArgumentParser:
22 """Define the command line arguments."""
23 parser = argparse.ArgumentParser(
24 description="Run all solvers on all instances to get their performance data.")
25 parser.add_argument(*ac.RecomputeRunSolversArgument.names,
26 **ac.RecomputeRunSolversArgument.kwargs)
27 parser.add_argument(*ac.SparkleObjectiveArgument.names,
28 **ac.SparkleObjectiveArgument.kwargs)
29 parser.add_argument(*ac.TargetCutOffTimeRunSolversArgument.names,
30 **ac.TargetCutOffTimeRunSolversArgument.kwargs)
31 parser.add_argument(*ac.AlsoConstructSelectorAndReportArgument.names,
32 **ac.AlsoConstructSelectorAndReportArgument.kwargs)
33 parser.add_argument(*ac.RunOnArgument.names,
34 **ac.RunOnArgument.kwargs)
35 parser.add_argument(*ac.SettingsFileArgument.names,
36 **ac.SettingsFileArgument.kwargs)
37 return parser
40def running_solvers_performance_data(
41 performance_data_csv_path: Path,
42 num_job_in_parallel: int,
43 rerun: bool = False,
44 run_on: Runner = Runner.SLURM) -> Run:
45 """Run the solvers for the performance data.
47 Parameters
48 ----------
49 performance_data_csv_path: Path
50 The path to the performance data file
51 num_job_in_parallel: int
52 The maximum number of jobs to run in parallel
53 rerun: bool
54 Run only solvers for which no data is available yet (False) or (re)run all
55 solvers to get (new) performance data for them (True)
56 run_on: Runner
57 Where to execute the solvers. For available values see runrunner.base.Runner
58 enum. Default: "Runner.SLURM".
60 Returns
61 -------
62 run: runrunner.LocalRun or runrunner.SlurmRun
63 If the run is local return a QueuedRun object with the information concerning
64 the run.
65 """
66 # Open the performance data csv file
67 performance_dataframe = PerformanceDataFrame(performance_data_csv_path)
68 # List of jobs to do
69 jobs = performance_dataframe.get_job_list(rerun=rerun)
70 num_jobs = len(jobs)
72 print("Cutoff time for each solver run: "
73 f"{gv.settings().get_general_target_cutoff_time()} seconds")
74 print(f"Total number of jobs to run: {num_jobs}")
76 # If there are no jobs, stop
77 if num_jobs == 0:
78 return None
80 if run_on == Runner.LOCAL:
81 print("Running the solvers locally")
82 elif run_on == Runner.SLURM:
83 print("Running the solvers through Slurm")
85 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True)
86 srun_options = ["-N1", "-n1"] + sbatch_options
87 objectives = gv.settings().get_general_sparkle_objectives()
88 run_solvers_core = Path(__file__).parent.resolve() / "core" / "run_solvers_core.py"
89 cmd_list = [f"{run_solvers_core} "
90 f"--performance-data {performance_data_csv_path} "
91 f"--instance {inst_p} --solver {solver_p} "
92 f"--objectives {','.join([str(o) for o in objectives])} "
93 f"--log-dir {sl.caller_log_dir}" for inst_p, _, solver_p in jobs]
95 run = rrr.add_to_queue(
96 runner=run_on,
97 cmd=cmd_list,
98 parallel_jobs=num_job_in_parallel,
99 name=CommandName.RUN_SOLVERS,
100 base_dir=sl.caller_log_dir,
101 sbatch_options=sbatch_options,
102 srun_options=srun_options)
104 if run_on == Runner.LOCAL:
105 # TODO: It would be nice to extract some info per job and print it
106 # As the user now only sees jobs starting and completing without their results
107 run.wait()
109 return run
112def run_solvers_on_instances(
113 recompute: bool = False,
114 run_on: Runner = Runner.SLURM,
115 also_construct_selector_and_report: bool = False) -> None:
116 """Run all the solvers on all the instances that were not not previously run.
118 If recompute is True, rerun everything even if previously run. Where the solvers are
119 executed can be controlled with "run_on".
121 Parameters
122 ----------
123 recompute: bool
124 If True, recompute all solver-instance pairs even if they were run before.
125 Default: False
126 run_on: Runner
127 On which computer or cluster environment to run the solvers.
128 Available: Runner.LOCAL, Runner.SLURM. Default: Runner.SLURM
129 also_construct_selector_and_report: bool
130 If True, the selector will be constructed and a report will be produced.
131 """
132 if recompute:
133 PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path).clean_csv()
134 num_job_in_parallel = gv.settings().get_number_of_jobs_in_parallel()
136 runs = [running_solvers_performance_data(
137 performance_data_csv_path=gv.settings().DEFAULT_performance_data_path,
138 num_job_in_parallel=num_job_in_parallel,
139 rerun=recompute,
140 run_on=run_on)]
142 # If there are no jobs return
143 if all(run is None for run in runs):
144 print("Running solvers done!")
145 return
147 sbatch_user_options = gv.settings().get_slurm_extra_options(as_args=True)
148 if also_construct_selector_and_report:
149 runs.append(rrr.add_to_queue(
150 runner=run_on,
151 cmd="sparkle/CLI/construct_portfolio_selector.py",
152 name=CommandName.CONSTRUCT_PORTFOLIO_SELECTOR,
153 dependencies=runs[-1],
154 base_dir=sl.caller_log_dir,
155 sbatch_options=sbatch_user_options))
157 runs.append(rrr.add_to_queue(
158 runner=run_on,
159 cmd="sparkle/CLI/generate_report.py",
160 name=CommandName.GENERATE_REPORT,
161 dependencies=runs[-1],
162 base_dir=sl.caller_log_dir,
163 sbatch_options=sbatch_user_options))
165 if run_on == Runner.LOCAL:
166 print("Waiting for the local calculations to finish.")
167 for run in runs:
168 if run is not None:
169 run.wait()
170 print("Running solvers done!")
171 elif run_on == Runner.SLURM:
172 print("Running solvers. Waiting for Slurm job(s) with id(s): "
173 f'{",".join(r.run_id for r in runs if r is not None)}')
176def main(argv: list[str]) -> None:
177 """Main function of the run solvers command."""
178 # Log command call
179 sl.log_command(sys.argv)
181 # Define command line arguments
182 parser = parser_function()
184 # Process command line arguments
185 args = parser.parse_args(argv)
187 if args.settings_file is not None:
188 # Do first, so other command line options can override settings from the file
189 gv.settings().read_settings_ini(args.settings_file, SettingState.CMD_LINE)
190 if args.objectives is not None:
191 gv.settings().set_general_sparkle_objectives(
192 args.objectives, SettingState.CMD_LINE
193 )
194 if args.target_cutoff_time is not None:
195 gv.settings().set_general_target_cutoff_time(
196 args.target_cutoff_time, SettingState.CMD_LINE)
197 if args.run_on is not None:
198 gv.settings().set_run_on(
199 args.run_on.value, SettingState.CMD_LINE)
201 check_for_initialise(COMMAND_DEPENDENCIES[CommandName.RUN_SOLVERS])
203 # Compare current settings to latest.ini
204 prev_settings = Settings(PurePath("Settings/latest.ini"))
205 Settings.check_settings_changes(gv.settings(), prev_settings)
207 print("Start running solvers ...")
209 # Write settings to file before starting, since they are used in callback scripts
210 gv.settings().write_used_settings()
212 run_on = gv.settings().get_run_on()
213 run_solvers_on_instances(
214 recompute=args.recompute,
215 also_construct_selector_and_report=args.also_construct_selector_and_report,
216 run_on=run_on)
217 sys.exit(0)
220if __name__ == "__main__":
221 main(sys.argv[1:])