Coverage for sparkle/CLI/construct_portfolio_selector.py: 71%

128 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 13:21 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to construct a portfolio selector.""" 

3import sys 

4import argparse 

5from pathlib import Path 

6 

7import runrunner as rrr 

8from runrunner.base import Runner 

9 

10from sparkle.selector import Selector, SelectionScenario 

11from sparkle.instance import Instance_Set 

12 

13from sparkle.platform.settings_objects import SettingState 

14from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

15from sparkle.types import resolve_objective 

16from sparkle.CLI.help import global_variables as gv 

17from sparkle.CLI.help import logging as sl 

18from sparkle.CLI.help import argparse_custom as ac 

19from sparkle.CLI.help.nicknames import resolve_object_name 

20from sparkle.CLI.initialise import check_for_initialise 

21 

22 

23def parser_function() -> argparse.ArgumentParser: 

24 """Define the command line arguments.""" 

25 parser = argparse.ArgumentParser( 

26 description="Command to construct a portfolio selector over all known features " 

27 "solver performances.") 

28 parser.add_argument(*ac.SolversArgument.names, 

29 **ac.SolversArgument.kwargs) 

30 parser.add_argument(*ac.RecomputePortfolioSelectorArgument.names, 

31 **ac.RecomputePortfolioSelectorArgument.kwargs) 

32 parser.add_argument(*ac.ObjectiveArgument.names, 

33 **ac.ObjectiveArgument.kwargs) 

34 parser.add_argument(*ac.SelectorAblationArgument.names, 

35 **ac.SelectorAblationArgument.kwargs) 

36 parser.add_argument(*ac.InstanceSetTrainOptionalArgument.names, 

37 **ac.InstanceSetTrainOptionalArgument.kwargs) 

38 parser.add_argument(*ac.RunOnArgument.names, 

39 **ac.RunOnArgument.kwargs) 

40 parser.add_argument(*ac.SettingsFileArgument.names, 

41 **ac.SettingsFileArgument.kwargs) 

42 # Solver Configurations arguments 

43 configuration_group = parser.add_mutually_exclusive_group(required=False) 

44 configuration_group.add_argument(*ac.AllSolverConfigurationArgument.names, 

45 **ac.AllSolverConfigurationArgument.kwargs) 

46 configuration_group.add_argument(*ac.BestSolverConfigurationArgument.names, 

47 **ac.BestSolverConfigurationArgument.kwargs) 

48 configuration_group.add_argument(*ac.DefaultSolverConfigurationArgument.names, 

49 **ac.DefaultSolverConfigurationArgument.kwargs) 

50 # TODO: Allow user to specify configuration ids to use 

51 return parser 

52 

53 

54def judge_exist_remaining_jobs(feature_data: FeatureDataFrame, 

55 performance_data: PerformanceDataFrame) -> bool: 

56 """Return whether there are remaining feature or performance computation jobs.""" 

57 missing_features = feature_data.has_missing_vectors() 

58 missing_performances = performance_data.has_missing_values 

59 if missing_features: 

60 print("There remain unperformed feature computation jobs! Please run: " 

61 "'sparkle compute features'") 

62 if missing_performances: 

63 print("There remain unperformed performance computation jobs! Please run: " 

64 "'sparkle run solvers --performance-data'") 

65 if missing_features or missing_performances: 

66 print("Please first execute all unperformed jobs before constructing Sparkle " 

67 "portfolio selector.") 

68 sys.exit(-1) 

69 

70 

71def main(argv: list[str]) -> None: 

72 """Main method of construct portfolio selector.""" 

73 # Log command call 

74 sl.log_command(sys.argv) 

75 check_for_initialise() 

76 

77 # Define command line arguments 

78 parser = parser_function() 

79 

80 # Process command line arguments 

81 args = parser.parse_args(argv) 

82 flag_recompute_portfolio = args.recompute_portfolio_selector 

83 solver_ablation = args.solver_ablation 

84 

85 if args.settings_file is not None: 

86 # Do first, so other command line options can override settings from the file 

87 gv.settings().read_settings_ini(args.settings_file, SettingState.CMD_LINE) 

88 if args.objective is not None: 

89 objective = resolve_objective(args.objective) 

90 else: 

91 objective = gv.settings().get_general_sparkle_objectives()[0] 

92 print("WARNING: No objective specified, defaulting to first objective from " 

93 f"settings ({objective}).") 

94 if args.run_on is not None: 

95 gv.settings().set_run_on(args.run_on.value, SettingState.CMD_LINE) 

96 run_on = gv.settings().get_run_on() 

97 

98 print("Start constructing Sparkle portfolio selector ...") 

99 selector = Selector(gv.settings().get_selection_class(), 

100 gv.settings().get_selection_model()) 

101 

102 instance_set = None 

103 if args.instance_set_train is not None: 

104 instance_set = resolve_object_name( 

105 args.instance_set_train, 

106 gv.file_storage_data_mapping[gv.instances_nickname_path], 

107 gv.settings().DEFAULT_instance_dir, Instance_Set) 

108 

109 solver_cutoff_time = gv.settings().get_general_solver_cutoff_time() 

110 extractor_cutoff_time = gv.settings().get_general_extractor_cutoff_time() 

111 

112 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path) 

113 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

114 

115 # Check that the feature data actually contains features (extractors) 

116 if feature_data.num_features == 0: 

117 print("ERROR: Feature data is empty! Please add a feature extractor and run " 

118 "'sparkle compute features' first.") 

119 sys.exit(-1) 

120 

121 # Filter objective 

122 performance_data.remove_objective([obj for obj in performance_data.objective_names 

123 if obj != objective.name]) 

124 if instance_set is not None: 

125 removable_instances = [i for i in performance_data.instances 

126 if i not in instance_set.instance_names] 

127 performance_data.remove_instances(removable_instances) 

128 feature_data.remove_instances(removable_instances) 

129 

130 if args.solvers is not None: 

131 solvers = args.solvers 

132 removeable_solvers = [s for s in performance_data.solvers if s not in solvers] 

133 performance_data.remove_solver(removeable_solvers) 

134 else: 

135 solvers = sorted([str(s) for s in gv.settings().DEFAULT_solver_dir.iterdir() 

136 if s.is_dir()]) 

137 

138 # Check what configurations should be considered 

139 if args.best_configuration: 

140 configurations = {s: performance_data.best_configuration(s, objective=objective) 

141 for s in solvers} 

142 elif args.default_configuration: 

143 configurations = {s: PerformanceDataFrame.default_configuration for s in solvers} 

144 else: 

145 configurations = {s: performance_data.get_configurations(s) for s in solvers} 

146 if not args.all_configurations: # Take the only configuration 

147 if any(len(c) > 1 for c in configurations.values()): 

148 print("ERROR: More than one configuration for the following solvers:") 

149 for solver, config in configurations.items(): 

150 if len(config) > 1: 

151 print(f"\t{solver}: {config} configurations") 

152 raise ValueError( 

153 "Please set the --all-configurations flag if you wish to use more " 

154 "than one configuration per solver.") 

155 for solver in solvers: 

156 removeable_configs = [c for c in performance_data.get_configurations(solver) 

157 if c not in configurations[solver]] 

158 performance_data.remove_configuration(solver, removeable_configs) 

159 

160 judge_exist_remaining_jobs(feature_data, performance_data) 

161 if feature_data.has_missing_value(): 

162 print("WARNING: Missing values in the feature data, will be imputed as the mean " 

163 "value of all other non-missing values! Imputing all missing values...") 

164 feature_data.impute_missing_values() 

165 

166 selection_scenario = SelectionScenario(gv.settings().DEFAULT_selection_output, 

167 selector, 

168 objective, 

169 performance_data, 

170 feature_data, 

171 solver_cutoff=solver_cutoff_time, 

172 extractor_cutoff=extractor_cutoff_time, 

173 ablate=solver_ablation) 

174 

175 if selection_scenario.selector_file_path.exists(): 

176 if not flag_recompute_portfolio: 

177 print("Portfolio selector already exists. " 

178 "Set the recompute flag to remove and reconstruct.") 

179 sys.exit(-1) 

180 # Delete all selectors 

181 selection_scenario.selector_file_path.unlink(missing_ok=True) 

182 if selection_scenario.ablation_scenarios: 

183 for scenario in selection_scenario.ablation_scenarios: 

184 scenario.selector_file_path.unlink(missing_ok=True) 

185 

186 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True) 

187 slurm_prepend = gv.settings().get_slurm_job_prepend() 

188 selector_run = selector.construct(selection_scenario, 

189 run_on=run_on, 

190 sbatch_options=sbatch_options, 

191 slurm_prepend=slurm_prepend, 

192 base_dir=sl.caller_log_dir) 

193 if run_on == Runner.LOCAL: 

194 print("Sparkle portfolio selector constructed!") 

195 else: 

196 print("Sparkle portfolio selector constructor running...") 

197 

198 dependencies = [selector_run] 

199 if solver_ablation: 

200 for ablated_scenario in selection_scenario.ablation_scenarios: 

201 selector_run = selector.construct( 

202 ablated_scenario, 

203 run_on=run_on, 

204 sbatch_options=sbatch_options, 

205 slurm_prepend=slurm_prepend, 

206 base_dir=sl.caller_log_dir) 

207 

208 # Compute the marginal contribution 

209 with_actual = "--actual" if solver_ablation else "" 

210 cmd = (f"python3 sparkle/CLI/compute_marginal_contribution.py --selection-scenario " 

211 f"{selection_scenario.scenario_file} --perfect {with_actual}") 

212 solver_names = ", ".join([Path(s).name for s in performance_data.solvers]) 

213 marginal_contribution = rrr.add_to_queue( 

214 runner=run_on, 

215 cmd=cmd, 

216 name=f"Marginal Contribution computation: {solver_names}", 

217 base_dir=sl.caller_log_dir, 

218 dependencies=dependencies, 

219 sbatch_options=sbatch_options, 

220 prepend=gv.settings().get_slurm_job_prepend()) 

221 dependencies.append(marginal_contribution) 

222 if run_on == Runner.LOCAL: 

223 marginal_contribution.wait() 

224 print("Selector marginal contribution computing done!") 

225 else: 

226 print(f"Running selector construction through Slurm with job id(s): " 

227 f"{', '.join([d.run_id for d in dependencies])}") 

228 

229 # Write used settings to file 

230 gv.settings().write_used_settings() 

231 sys.exit(0) 

232 

233 

234if __name__ == "__main__": 

235 main(sys.argv[1:])