Coverage for sparkle/CLI/construct_portfolio_selector.py: 70%

132 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 10:17 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to construct a portfolio selector.""" 

3 

4import sys 

5import argparse 

6 

7from runrunner.base import Runner 

8 

9from sparkle.selector import Selector, SelectionScenario 

10from sparkle.instance import Instance_Set 

11 

12from sparkle.platform.settings_objects import Settings 

13from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

14from sparkle.types import resolve_objective 

15from sparkle.CLI.help import global_variables as gv 

16from sparkle.CLI.help import logging as sl 

17from sparkle.CLI.help import argparse_custom as ac 

18from sparkle.CLI.help.nicknames import resolve_object_name, resolve_instance_name 

19from sparkle.CLI.initialise import check_for_initialise 

20 

21 

22def parser_function() -> argparse.ArgumentParser: 

23 """Define the command line arguments.""" 

24 parser = argparse.ArgumentParser( 

25 description="Command to construct a portfolio selector over all known features " 

26 "solver performances." 

27 ) 

28 parser.add_argument(*ac.SolversArgument.names, **ac.SolversArgument.kwargs) 

29 parser.add_argument( 

30 *ac.RecomputePortfolioSelectorArgument.names, 

31 **ac.RecomputePortfolioSelectorArgument.kwargs, 

32 ) 

33 parser.add_argument(*ac.ObjectiveArgument.names, **ac.ObjectiveArgument.kwargs) 

34 parser.add_argument( 

35 *ac.SelectorAblationArgument.names, **ac.SelectorAblationArgument.kwargs 

36 ) 

37 parser.add_argument( 

38 *ac.InstanceSetTrainOptionalArgument.names, 

39 **ac.InstanceSetTrainOptionalArgument.kwargs, 

40 ) 

41 # Solver Configurations arguments 

42 configuration_group = parser.add_mutually_exclusive_group(required=False) 

43 configuration_group.add_argument( 

44 *ac.AllSolverConfigurationArgument.names, 

45 **ac.AllSolverConfigurationArgument.kwargs, 

46 ) 

47 configuration_group.add_argument( 

48 *ac.BestSolverConfigurationArgument.names, 

49 **ac.BestSolverConfigurationArgument.kwargs, 

50 ) 

51 configuration_group.add_argument( 

52 *ac.DefaultSolverConfigurationArgument.names, 

53 **ac.DefaultSolverConfigurationArgument.kwargs, 

54 ) 

55 # TODO: Allow user to specify configuration ids to use 

56 # Settings arguments 

57 parser.add_argument(*ac.SettingsFileArgument.names, **ac.SettingsFileArgument.kwargs) 

58 parser.add_argument( 

59 *Settings.OPTION_minimum_marginal_contribution.args, 

60 **Settings.OPTION_minimum_marginal_contribution.kwargs, 

61 ) 

62 parser.add_argument(*Settings.OPTION_run_on.args, **Settings.OPTION_run_on.kwargs) 

63 return parser 

64 

65 

66def judge_exist_remaining_jobs( 

67 feature_data: FeatureDataFrame, performance_data: PerformanceDataFrame 

68) -> bool: 

69 """Return whether there are remaining feature or performance computation jobs.""" 

70 missing_features = feature_data.has_missing_vectors() 

71 missing_performances = performance_data.has_missing_values 

72 if missing_features: 

73 print( 

74 "There remain unperformed feature computation jobs! Please run: " 

75 "'sparkle compute features'" 

76 ) 

77 if missing_performances: 

78 print( 

79 "There remain unperformed performance computation jobs! Please run:\n" 

80 "'sparkle cleanup --performance-data'\n" 

81 "to check for missing values in the logs, otherwise run:\n" 

82 "'sparkle run solvers --performance-data'\n" 

83 "to compute missing values." 

84 ) 

85 if missing_features or missing_performances: 

86 print( 

87 "Please first execute all unperformed jobs before constructing Sparkle " 

88 "portfolio selector." 

89 ) 

90 sys.exit(-1) 

91 

92 

93def main(argv: list[str]) -> None: 

94 """Main method of construct portfolio selector.""" 

95 # Define command line arguments 

96 parser = parser_function() 

97 

98 # Process command line arguments 

99 args = parser.parse_args(argv) 

100 settings = gv.settings(args) 

101 

102 # Log command call 

103 sl.log_command(sys.argv, settings.random_state) 

104 check_for_initialise() 

105 

106 flag_recompute_portfolio = args.recompute_portfolio_selector 

107 solver_ablation = args.solver_ablation 

108 

109 if args.objective is not None: 

110 objective = resolve_objective(args.objective) 

111 else: 

112 objective = settings.objectives[0] 

113 print( 

114 "WARNING: No objective specified, defaulting to first objective from " 

115 f"settings ({objective})." 

116 ) 

117 run_on = settings.run_on 

118 

119 print("Start constructing Sparkle portfolio selector ...") 

120 selector = Selector(settings.selection_class, settings.selection_model) 

121 

122 instance_set = None 

123 if args.instance_set_train is not None: 

124 instance_set = resolve_object_name( 

125 args.instance_set_train, 

126 gv.file_storage_data_mapping[gv.instances_nickname_path], 

127 gv.settings().DEFAULT_instance_dir, 

128 Instance_Set, 

129 ) 

130 

131 solver_cutoff_time = gv.settings().solver_cutoff_time 

132 extractor_cutoff_time = gv.settings().extractor_cutoff_time 

133 

134 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path) 

135 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

136 

137 # Check that the feature data actually contains features (extractors) 

138 if feature_data.num_features == 0: 

139 print( 

140 "ERROR: Feature data is empty! Please add a feature extractor and run " 

141 "'sparkle compute features' first." 

142 ) 

143 sys.exit(-1) 

144 

145 # Filter objective 

146 performance_data.remove_objective( 

147 [obj for obj in performance_data.objective_names if obj != objective.name] 

148 ) 

149 if instance_set is not None: 

150 removable_instances = [ 

151 i for i in performance_data.instances if i not in instance_set.instance_names 

152 ] 

153 performance_data.remove_instances(removable_instances) 

154 feature_data.remove_instances(removable_instances) 

155 

156 if args.solvers is not None: 

157 solvers = args.solvers 

158 removeable_solvers = [s for s in performance_data.solvers if s not in solvers] 

159 performance_data.remove_solver(removeable_solvers) 

160 else: 

161 solvers = sorted( 

162 [str(s) for s in gv.settings().DEFAULT_solver_dir.iterdir() if s.is_dir()] 

163 ) 

164 

165 # Check what configurations should be considered 

166 if args.best_configuration: 

167 configurations = { 

168 s: performance_data.best_configuration(s, objective=objective) 

169 for s in solvers 

170 } 

171 elif args.default_configuration: 

172 configurations = {s: PerformanceDataFrame.default_configuration for s in solvers} 

173 else: 

174 configurations = {s: performance_data.get_configurations(s) for s in solvers} 

175 if not args.all_configurations: # Take the only configuration 

176 if any(len(c) > 1 for c in configurations.values()): 

177 print("ERROR: More than one configuration for the following solvers:") 

178 for solver, config in configurations.items(): 

179 if len(config) > 1: 

180 print(f"\t{solver}: {config} configurations") 

181 raise ValueError( 

182 "Please set the --all-configurations flag if you wish to use more " 

183 "than one configuration per solver." 

184 ) 

185 for solver in solvers: 

186 removeable_configs = [ 

187 c 

188 for c in performance_data.get_configurations(solver) 

189 if c not in configurations[solver] 

190 ] 

191 performance_data.remove_configuration(solver, removeable_configs) 

192 

193 judge_exist_remaining_jobs(feature_data, performance_data) 

194 if feature_data.has_missing_value(): 

195 print( 

196 "WARNING: Missing values in the feature data, will be imputed as the mean " 

197 "value of all other non-missing values! Imputing all missing values..." 

198 ) 

199 feature_data.impute_missing_values() 

200 

201 # Filter the scenario from Solver (Configurations) that do not meet the minimum marginal contribution on the training set 

202 if gv.settings().minimum_marginal_contribution > 0.0: 

203 print( 

204 f"Filtering the scenario from Solver (Configurations) with contribution < {gv.settings().minimum_marginal_contribution} ..." 

205 ) 

206 for ( 

207 solver, 

208 config_id, 

209 marginal_contribution, 

210 _, 

211 ) in performance_data.marginal_contribution(objective=objective): 

212 if marginal_contribution < gv.settings().minimum_marginal_contribution: 

213 print(f"\tRemoving {solver}, {config_id} [{marginal_contribution}]") 

214 performance_data.remove_configuration(solver, config_id) 

215 

216 selection_scenario = SelectionScenario( 

217 gv.settings().DEFAULT_selection_output, 

218 selector, 

219 objective, 

220 performance_data, 

221 feature_data, 

222 solver_cutoff=solver_cutoff_time, 

223 extractor_cutoff=extractor_cutoff_time, 

224 ablate=solver_ablation, 

225 ) 

226 

227 if selection_scenario.selector_file_path.exists(): 

228 if not flag_recompute_portfolio: 

229 print( 

230 "Portfolio selector already exists. " 

231 "Set the recompute flag to remove and reconstruct." 

232 ) 

233 sys.exit(-1) 

234 # Delete all selectors 

235 selection_scenario.selector_file_path.unlink(missing_ok=True) 

236 if selection_scenario.ablation_scenarios: 

237 for scenario in selection_scenario.ablation_scenarios: 

238 scenario.selector_file_path.unlink(missing_ok=True) 

239 

240 sbatch_options = gv.settings().sbatch_settings 

241 slurm_prepend = gv.settings().slurm_job_prepend 

242 selector_run = selector.construct( 

243 selection_scenario, 

244 run_on=run_on, 

245 sbatch_options=sbatch_options, 

246 slurm_prepend=slurm_prepend, 

247 base_dir=sl.caller_log_dir, 

248 ) 

249 jobs = [selector_run] 

250 if run_on == Runner.LOCAL: 

251 print("Sparkle portfolio selector constructed!") 

252 else: 

253 print("Sparkle portfolio selector constructor running...") 

254 

255 # Validate the selector to run on the given instances 

256 instances = [ 

257 resolve_instance_name(instance, Settings.DEFAULT_instance_dir) 

258 for instance in performance_data.instances 

259 ] 

260 selector_validation = selector.run_cli( 

261 selection_scenario.scenario_file, 

262 instances, 

263 feature_data.csv_filepath, 

264 run_on=run_on, 

265 sbatch_options=sbatch_options, 

266 slurm_prepend=slurm_prepend, 

267 dependencies=[selector_run], 

268 log_dir=sl.caller_log_dir, 

269 ) 

270 jobs.append(selector_validation) 

271 

272 if solver_ablation: 

273 for ablated_scenario in selection_scenario.ablation_scenarios: 

274 # Construct the ablated selector 

275 ablation_run = selector.construct( 

276 ablated_scenario, 

277 run_on=run_on, 

278 sbatch_options=sbatch_options, 

279 slurm_prepend=slurm_prepend, 

280 base_dir=sl.caller_log_dir, 

281 ) 

282 # Validate the ablated selector 

283 ablation_validation = selector.run_cli( 

284 ablated_scenario.scenario_file, 

285 instances, 

286 feature_data.csv_filepath, 

287 run_on=run_on, 

288 sbatch_options=sbatch_options, 

289 slurm_prepend=slurm_prepend, 

290 job_name=f"Selector Ablation: {ablated_scenario.directory.name} on {len(instances)} instances", 

291 dependencies=[ablation_run], 

292 log_dir=sl.caller_log_dir, 

293 ) 

294 jobs.extend([ablation_run, ablation_validation]) 

295 

296 if run_on == Runner.LOCAL: 

297 for job in jobs: 

298 job.wait() 

299 selector_validation.wait() 

300 print("Selector validation done!") 

301 else: 

302 print( 

303 f"Running selector construction through Slurm with job id(s): " 

304 f"{', '.join([d.run_id for d in jobs])}" 

305 ) 

306 

307 # Write used settings to file 

308 gv.settings().write_used_settings() 

309 sys.exit(0) 

310 

311 

312if __name__ == "__main__": 

313 main(sys.argv[1:])