Coverage for src / sparkle / CLI / construct_portfolio_selector.py: 13%

135 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 15:31 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to construct a portfolio selector.""" 

3 

4import sys 

5import argparse 

6 

7from runrunner.base import Runner 

8 

9from sparkle.selector import Selector, SelectionScenario 

10from sparkle.instance import Instance_Set 

11 

12from sparkle.platform.settings_objects import Settings 

13from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

14from sparkle.types import resolve_objective 

15from sparkle.CLI.help import global_variables as gv 

16from sparkle.CLI.help import logging as sl 

17from sparkle.CLI.help import argparse_custom as ac 

18from sparkle.CLI.help.nicknames import resolve_object_name, resolve_instance_name 

19from sparkle.CLI.initialise import check_for_initialise 

20 

21 

22def parser_function() -> argparse.ArgumentParser: 

23 """Define the command line arguments.""" 

24 parser = argparse.ArgumentParser( 

25 description="Command to construct a portfolio selector over all known features " 

26 "solver performances." 

27 ) 

28 parser.add_argument(*ac.SolversArgument.names, **ac.SolversArgument.kwargs) 

29 parser.add_argument( 

30 *ac.RecomputePortfolioSelectorArgument.names, 

31 **ac.RecomputePortfolioSelectorArgument.kwargs, 

32 ) 

33 parser.add_argument(*ac.ObjectiveArgument.names, **ac.ObjectiveArgument.kwargs) 

34 parser.add_argument( 

35 *ac.SelectorAblationArgument.names, **ac.SelectorAblationArgument.kwargs 

36 ) 

37 parser.add_argument( 

38 *ac.InstanceSetTrainOptionalArgument.names, 

39 **ac.InstanceSetTrainOptionalArgument.kwargs, 

40 ) 

41 # Solver Configurations arguments 

42 configuration_group = parser.add_mutually_exclusive_group(required=False) 

43 configuration_group.add_argument( 

44 *ac.AllSolverConfigurationArgument.names, 

45 **ac.AllSolverConfigurationArgument.kwargs, 

46 ) 

47 configuration_group.add_argument( 

48 *ac.BestSolverConfigurationArgument.names, 

49 **ac.BestSolverConfigurationArgument.kwargs, 

50 ) 

51 configuration_group.add_argument( 

52 *ac.DefaultSolverConfigurationArgument.names, 

53 **ac.DefaultSolverConfigurationArgument.kwargs, 

54 ) 

55 # TODO: Allow user to specify configuration ids to use 

56 # Settings arguments 

57 parser.add_argument(*ac.SettingsFileArgument.names, **ac.SettingsFileArgument.kwargs) 

58 parser.add_argument( 

59 *Settings.OPTION_minimum_marginal_contribution.args, 

60 **Settings.OPTION_minimum_marginal_contribution.kwargs, 

61 ) 

62 parser.add_argument(*Settings.OPTION_run_on.args, **Settings.OPTION_run_on.kwargs) 

63 return parser 

64 

65 

66def judge_exist_remaining_jobs( 

67 feature_data: FeatureDataFrame, performance_data: PerformanceDataFrame 

68) -> bool: 

69 """Return whether there are remaining feature or performance computation jobs.""" 

70 missing_features = feature_data.has_missing_vectors() 

71 missing_performances = performance_data.has_missing_values 

72 if missing_features: 

73 print( 

74 "There remain unperformed feature computation jobs! Please run: " 

75 "'sparkle compute features'" 

76 ) 

77 if missing_performances: 

78 print( 

79 "There remain unperformed performance computation jobs! Please run:\n" 

80 "'sparkle cleanup --performance-data'\n" 

81 "to check for missing values in the logs, otherwise run:\n" 

82 "'sparkle run solvers --performance-data'\n" 

83 "to compute missing values." 

84 ) 

85 if missing_features or missing_performances: 

86 print( 

87 "Please first execute all unperformed jobs before constructing Sparkle " 

88 "portfolio selector." 

89 ) 

90 sys.exit(-1) 

91 

92 

93def main(argv: list[str]) -> None: 

94 """Main method of construct portfolio selector.""" 

95 # Define command line arguments 

96 parser = parser_function() 

97 

98 # Process command line arguments 

99 args = parser.parse_args(argv) 

100 settings = gv.settings(args) 

101 

102 # Log command call 

103 sl.log_command(sys.argv, settings.random_state) 

104 check_for_initialise() 

105 

106 flag_recompute_portfolio = args.recompute_portfolio_selector 

107 solver_ablation = args.solver_ablation 

108 

109 if args.objective is not None: 

110 objective = resolve_objective(args.objective) 

111 else: 

112 objective = settings.objectives[0] 

113 print( 

114 "WARNING: No objective specified, defaulting to first objective from " 

115 f"settings ({objective})." 

116 ) 

117 run_on = settings.run_on 

118 

119 print("Start constructing Sparkle portfolio selector ...") 

120 if settings.selection_class is None or settings.selection_model is None: 

121 print( 

122 "Selector class or model not set! Values:\n" 

123 f"\t - Class: {settings.selection_class}\n" 

124 f"\t - Model: {settings.selection_model}\n" 

125 "Please specify these values in the sparkle settings." 

126 ) 

127 sys.exit(-1) 

128 

129 selector = Selector(settings.selection_class, settings.selection_model) 

130 

131 instance_set = None 

132 if args.instance_set_train is not None: 

133 instance_set = resolve_object_name( 

134 args.instance_set_train, 

135 gv.file_storage_data_mapping[gv.instances_nickname_path], 

136 gv.settings().DEFAULT_instance_dir, 

137 Instance_Set, 

138 ) 

139 

140 solver_cutoff_time = gv.settings().solver_cutoff_time 

141 extractor_cutoff_time = gv.settings().extractor_cutoff_time 

142 

143 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path) 

144 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

145 

146 # Check that the feature data actually contains features (extractors) 

147 if feature_data.num_features == 0: 

148 print( 

149 "ERROR: Feature data is empty! Please add a feature extractor and run " 

150 "'sparkle compute features' first." 

151 ) 

152 sys.exit(-1) 

153 

154 # Filter objective 

155 performance_data.remove_objective( 

156 [obj for obj in performance_data.objective_names if obj != objective.name] 

157 ) 

158 if instance_set is not None: 

159 removable_instances = [ 

160 i for i in performance_data.instances if i not in instance_set.instance_names 

161 ] 

162 performance_data.remove_instances(removable_instances) 

163 feature_data.remove_instances(removable_instances) 

164 

165 if args.solvers is not None: 

166 solvers = args.solvers 

167 removeable_solvers = [s for s in performance_data.solvers if s not in solvers] 

168 performance_data.remove_solver(removeable_solvers) 

169 else: 

170 solvers = sorted( 

171 [str(s) for s in gv.settings().DEFAULT_solver_dir.iterdir() if s.is_dir()] 

172 ) 

173 

174 # Check what configurations should be considered 

175 if args.best_configuration: 

176 configurations = { 

177 s: performance_data.best_configuration(s, objective=objective) 

178 for s in solvers 

179 } 

180 elif args.default_configuration: 

181 configurations = {s: PerformanceDataFrame.default_configuration for s in solvers} 

182 else: 

183 configurations = {s: performance_data.get_configurations(s) for s in solvers} 

184 if not args.all_configurations: # Take the only configuration 

185 if any(len(c) > 1 for c in configurations.values()): 

186 print("ERROR: More than one configuration for the following solvers:") 

187 for solver, config in configurations.items(): 

188 if len(config) > 1: 

189 print(f"\t{solver}: {config} configurations") 

190 raise ValueError( 

191 "Please set the --all-configurations flag if you wish to use more " 

192 "than one configuration per solver." 

193 ) 

194 for solver in solvers: 

195 removeable_configs = [ 

196 c 

197 for c in performance_data.get_configurations(solver) 

198 if c not in configurations[solver] 

199 ] 

200 performance_data.remove_configuration(solver, removeable_configs) 

201 

202 judge_exist_remaining_jobs(feature_data, performance_data) 

203 if feature_data.has_missing_value(): 

204 print( 

205 "WARNING: Missing values in the feature data, will be imputed as the mean " 

206 "value of all other non-missing values! Imputing all missing values..." 

207 ) 

208 feature_data.impute_missing_values() 

209 

210 # Filter the scenario from Solver (Configurations) that do not meet the minimum marginal contribution on the training set 

211 if gv.settings().minimum_marginal_contribution > 0.0: 

212 print( 

213 f"Filtering the scenario from Solver (Configurations) with contribution < {gv.settings().minimum_marginal_contribution} ..." 

214 ) 

215 for ( 

216 solver, 

217 config_id, 

218 marginal_contribution, 

219 _, 

220 ) in performance_data.marginal_contribution(objective=objective): 

221 if marginal_contribution < gv.settings().minimum_marginal_contribution: 

222 print(f"\tRemoving {solver}, {config_id} [{marginal_contribution}]") 

223 performance_data.remove_configuration(solver, config_id) 

224 

225 selection_scenario = SelectionScenario( 

226 gv.settings().DEFAULT_selection_output, 

227 selector, 

228 objective, 

229 performance_data, 

230 feature_data, 

231 solver_cutoff=solver_cutoff_time, 

232 extractor_cutoff=extractor_cutoff_time, 

233 ablate=solver_ablation, 

234 ) 

235 

236 if selection_scenario.selector_file_path.exists(): 

237 if not flag_recompute_portfolio: 

238 print( 

239 "Portfolio selector already exists. " 

240 "Set the recompute flag to remove and reconstruct." 

241 ) 

242 sys.exit(-1) 

243 # Delete all selectors 

244 selection_scenario.selector_file_path.unlink(missing_ok=True) 

245 if selection_scenario.ablation_scenarios: 

246 for scenario in selection_scenario.ablation_scenarios: 

247 scenario.selector_file_path.unlink(missing_ok=True) 

248 

249 sbatch_options = gv.settings().sbatch_settings 

250 slurm_prepend = gv.settings().slurm_job_prepend 

251 selector_run = selector.construct( 

252 selection_scenario, 

253 run_on=run_on, 

254 sbatch_options=sbatch_options, 

255 slurm_prepend=slurm_prepend, 

256 base_dir=sl.caller_log_dir, 

257 ) 

258 jobs = [selector_run] 

259 if run_on == Runner.LOCAL: 

260 print("Sparkle portfolio selector constructed!") 

261 else: 

262 print("Sparkle portfolio selector constructor running...") 

263 

264 # Validate the selector to run on the given instances 

265 instances = [ 

266 resolve_instance_name(instance, Settings.DEFAULT_instance_dir) 

267 for instance in performance_data.instances 

268 ] 

269 selector_validation = selector.run_cli( 

270 selection_scenario.scenario_file, 

271 instances, 

272 feature_data.csv_filepath, 

273 run_on=run_on, 

274 sbatch_options=sbatch_options, 

275 slurm_prepend=slurm_prepend, 

276 dependencies=[selector_run], 

277 log_dir=sl.caller_log_dir, 

278 ) 

279 jobs.append(selector_validation) 

280 

281 if solver_ablation: 

282 for ablated_scenario in selection_scenario.ablation_scenarios: 

283 # Construct the ablated selector 

284 ablation_run = selector.construct( 

285 ablated_scenario, 

286 run_on=run_on, 

287 sbatch_options=sbatch_options, 

288 slurm_prepend=slurm_prepend, 

289 base_dir=sl.caller_log_dir, 

290 ) 

291 # Validate the ablated selector 

292 ablation_validation = selector.run_cli( 

293 ablated_scenario.scenario_file, 

294 instances, 

295 feature_data.csv_filepath, 

296 run_on=run_on, 

297 sbatch_options=sbatch_options, 

298 slurm_prepend=slurm_prepend, 

299 job_name=f"Selector Ablation: {ablated_scenario.directory.name} on {len(instances)} instances", 

300 dependencies=[ablation_run], 

301 log_dir=sl.caller_log_dir, 

302 ) 

303 jobs.extend([ablation_run, ablation_validation]) 

304 

305 if run_on == Runner.LOCAL: 

306 for job in jobs: 

307 job.wait() 

308 selector_validation.wait() 

309 print("Selector validation done!") 

310 else: 

311 print( 

312 f"Running selector construction through Slurm with job id(s): " 

313 f"{', '.join([d.run_id for d in jobs])}" 

314 ) 

315 

316 # Write used settings to file 

317 gv.settings().write_used_settings() 

318 sys.exit(0) 

319 

320 

321if __name__ == "__main__": 

322 main(sys.argv[1:])