Coverage for sparkle/CLI/run_solvers.py: 88%

85 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 14:48 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to run solvers to get their performance data.""" 

3from __future__ import annotations 

4 

5import sys 

6import argparse 

7from pathlib import PurePath, Path 

8 

9import runrunner as rrr 

10from runrunner.base import Runner, Run 

11 

12from sparkle.CLI.help import global_variables as gv 

13from sparkle.structures import PerformanceDataFrame 

14from sparkle.CLI.help import logging as sl 

15from sparkle.platform.settings_objects import Settings, SettingState 

16from sparkle.platform import CommandName, COMMAND_DEPENDENCIES 

17from sparkle.CLI.initialise import check_for_initialise 

18from sparkle.CLI.help import argparse_custom as ac 

19 

20 

21def parser_function() -> argparse.ArgumentParser: 

22 """Define the command line arguments.""" 

23 parser = argparse.ArgumentParser( 

24 description="Run all solvers on all instances to get their performance data.") 

25 parser.add_argument(*ac.RecomputeRunSolversArgument.names, 

26 **ac.RecomputeRunSolversArgument.kwargs) 

27 parser.add_argument(*ac.SparkleObjectiveArgument.names, 

28 **ac.SparkleObjectiveArgument.kwargs) 

29 parser.add_argument(*ac.TargetCutOffTimeRunSolversArgument.names, 

30 **ac.TargetCutOffTimeRunSolversArgument.kwargs) 

31 parser.add_argument(*ac.AlsoConstructSelectorAndReportArgument.names, 

32 **ac.AlsoConstructSelectorAndReportArgument.kwargs) 

33 parser.add_argument(*ac.RunOnArgument.names, 

34 **ac.RunOnArgument.kwargs) 

35 parser.add_argument(*ac.SettingsFileArgument.names, 

36 **ac.SettingsFileArgument.kwargs) 

37 return parser 

38 

39 

40def running_solvers_performance_data( 

41 performance_data_csv_path: Path, 

42 num_job_in_parallel: int, 

43 rerun: bool = False, 

44 run_on: Runner = Runner.SLURM) -> Run: 

45 """Run the solvers for the performance data. 

46 

47 Parameters 

48 ---------- 

49 performance_data_csv_path: Path 

50 The path to the performance data file 

51 num_job_in_parallel: int 

52 The maximum number of jobs to run in parallel 

53 rerun: bool 

54 Run only solvers for which no data is available yet (False) or (re)run all 

55 solvers to get (new) performance data for them (True) 

56 run_on: Runner 

57 Where to execute the solvers. For available values see runrunner.base.Runner 

58 enum. Default: "Runner.SLURM". 

59 

60 Returns 

61 ------- 

62 run: runrunner.LocalRun or runrunner.SlurmRun 

63 If the run is local return a QueuedRun object with the information concerning 

64 the run. 

65 """ 

66 # Open the performance data csv file 

67 performance_dataframe = PerformanceDataFrame(performance_data_csv_path) 

68 # List of jobs to do 

69 jobs = performance_dataframe.get_job_list(rerun=rerun) 

70 num_jobs = len(jobs) 

71 

72 print("Cutoff time for each solver run: " 

73 f"{gv.settings().get_general_target_cutoff_time()} seconds") 

74 print(f"Total number of jobs to run: {num_jobs}") 

75 

76 # If there are no jobs, stop 

77 if num_jobs == 0: 

78 return None 

79 

80 if run_on == Runner.LOCAL: 

81 print("Running the solvers locally") 

82 elif run_on == Runner.SLURM: 

83 print("Running the solvers through Slurm") 

84 

85 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True) 

86 srun_options = ["-N1", "-n1"] + sbatch_options 

87 objectives = gv.settings().get_general_sparkle_objectives() 

88 run_solvers_core = Path(__file__).parent.resolve() / "core" / "run_solvers_core.py" 

89 cmd_list = [f"{run_solvers_core} " 

90 f"--performance-data {performance_data_csv_path} " 

91 f"--instance {inst_p} --solver {solver_p} " 

92 f"--objectives {','.join([str(o) for o in objectives])} " 

93 f"--log-dir {sl.caller_log_dir}" for inst_p, _, solver_p in jobs] 

94 

95 run = rrr.add_to_queue( 

96 runner=run_on, 

97 cmd=cmd_list, 

98 parallel_jobs=num_job_in_parallel, 

99 name=CommandName.RUN_SOLVERS, 

100 base_dir=sl.caller_log_dir, 

101 sbatch_options=sbatch_options, 

102 srun_options=srun_options) 

103 

104 if run_on == Runner.LOCAL: 

105 # TODO: It would be nice to extract some info per job and print it 

106 # As the user now only sees jobs starting and completing without their results 

107 run.wait() 

108 

109 return run 

110 

111 

112def run_solvers_on_instances( 

113 recompute: bool = False, 

114 run_on: Runner = Runner.SLURM, 

115 also_construct_selector_and_report: bool = False) -> None: 

116 """Run all the solvers on all the instances that were not not previously run. 

117 

118 If recompute is True, rerun everything even if previously run. Where the solvers are 

119 executed can be controlled with "run_on". 

120 

121 Parameters 

122 ---------- 

123 recompute: bool 

124 If True, recompute all solver-instance pairs even if they were run before. 

125 Default: False 

126 run_on: Runner 

127 On which computer or cluster environment to run the solvers. 

128 Available: Runner.LOCAL, Runner.SLURM. Default: Runner.SLURM 

129 also_construct_selector_and_report: bool 

130 If True, the selector will be constructed and a report will be produced. 

131 """ 

132 if recompute: 

133 PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path).clean_csv() 

134 num_job_in_parallel = gv.settings().get_number_of_jobs_in_parallel() 

135 

136 runs = [running_solvers_performance_data( 

137 performance_data_csv_path=gv.settings().DEFAULT_performance_data_path, 

138 num_job_in_parallel=num_job_in_parallel, 

139 rerun=recompute, 

140 run_on=run_on)] 

141 

142 # If there are no jobs return 

143 if all(run is None for run in runs): 

144 print("Running solvers done!") 

145 return 

146 

147 sbatch_user_options = gv.settings().get_slurm_extra_options(as_args=True) 

148 if also_construct_selector_and_report: 

149 runs.append(rrr.add_to_queue( 

150 runner=run_on, 

151 cmd="sparkle/CLI/construct_portfolio_selector.py", 

152 name=CommandName.CONSTRUCT_PORTFOLIO_SELECTOR, 

153 dependencies=runs[-1], 

154 base_dir=sl.caller_log_dir, 

155 sbatch_options=sbatch_user_options)) 

156 

157 runs.append(rrr.add_to_queue( 

158 runner=run_on, 

159 cmd="sparkle/CLI/generate_report.py", 

160 name=CommandName.GENERATE_REPORT, 

161 dependencies=runs[-1], 

162 base_dir=sl.caller_log_dir, 

163 sbatch_options=sbatch_user_options)) 

164 

165 if run_on == Runner.LOCAL: 

166 print("Waiting for the local calculations to finish.") 

167 for run in runs: 

168 if run is not None: 

169 run.wait() 

170 print("Running solvers done!") 

171 elif run_on == Runner.SLURM: 

172 print("Running solvers. Waiting for Slurm job(s) with id(s): " 

173 f'{",".join(r.run_id for r in runs if r is not None)}') 

174 

175 

176def main(argv: list[str]) -> None: 

177 """Main function of the run solvers command.""" 

178 # Log command call 

179 sl.log_command(sys.argv) 

180 

181 # Define command line arguments 

182 parser = parser_function() 

183 

184 # Process command line arguments 

185 args = parser.parse_args(argv) 

186 

187 if args.settings_file is not None: 

188 # Do first, so other command line options can override settings from the file 

189 gv.settings().read_settings_ini(args.settings_file, SettingState.CMD_LINE) 

190 if args.objectives is not None: 

191 gv.settings().set_general_sparkle_objectives( 

192 args.objectives, SettingState.CMD_LINE 

193 ) 

194 if args.target_cutoff_time is not None: 

195 gv.settings().set_general_target_cutoff_time( 

196 args.target_cutoff_time, SettingState.CMD_LINE) 

197 if args.run_on is not None: 

198 gv.settings().set_run_on( 

199 args.run_on.value, SettingState.CMD_LINE) 

200 

201 check_for_initialise(COMMAND_DEPENDENCIES[CommandName.RUN_SOLVERS]) 

202 

203 # Compare current settings to latest.ini 

204 prev_settings = Settings(PurePath("Settings/latest.ini")) 

205 Settings.check_settings_changes(gv.settings(), prev_settings) 

206 

207 print("Start running solvers ...") 

208 

209 # Write settings to file before starting, since they are used in callback scripts 

210 gv.settings().write_used_settings() 

211 

212 run_on = gv.settings().get_run_on() 

213 run_solvers_on_instances( 

214 recompute=args.recompute, 

215 also_construct_selector_and_report=args.also_construct_selector_and_report, 

216 run_on=run_on) 

217 sys.exit(0) 

218 

219 

220if __name__ == "__main__": 

221 main(sys.argv[1:])