Coverage for sparkle/CLI/construct_portfolio_selector.py: 70%
132 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
1#!/usr/bin/env python3
2"""Sparkle command to construct a portfolio selector."""
4import sys
5import argparse
7from runrunner.base import Runner
9from sparkle.selector import Selector, SelectionScenario
10from sparkle.instance import Instance_Set
12from sparkle.platform.settings_objects import Settings
13from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
14from sparkle.types import resolve_objective
15from sparkle.CLI.help import global_variables as gv
16from sparkle.CLI.help import logging as sl
17from sparkle.CLI.help import argparse_custom as ac
18from sparkle.CLI.help.nicknames import resolve_object_name, resolve_instance_name
19from sparkle.CLI.initialise import check_for_initialise
22def parser_function() -> argparse.ArgumentParser:
23 """Define the command line arguments."""
24 parser = argparse.ArgumentParser(
25 description="Command to construct a portfolio selector over all known features "
26 "solver performances."
27 )
28 parser.add_argument(*ac.SolversArgument.names, **ac.SolversArgument.kwargs)
29 parser.add_argument(
30 *ac.RecomputePortfolioSelectorArgument.names,
31 **ac.RecomputePortfolioSelectorArgument.kwargs,
32 )
33 parser.add_argument(*ac.ObjectiveArgument.names, **ac.ObjectiveArgument.kwargs)
34 parser.add_argument(
35 *ac.SelectorAblationArgument.names, **ac.SelectorAblationArgument.kwargs
36 )
37 parser.add_argument(
38 *ac.InstanceSetTrainOptionalArgument.names,
39 **ac.InstanceSetTrainOptionalArgument.kwargs,
40 )
41 # Solver Configurations arguments
42 configuration_group = parser.add_mutually_exclusive_group(required=False)
43 configuration_group.add_argument(
44 *ac.AllSolverConfigurationArgument.names,
45 **ac.AllSolverConfigurationArgument.kwargs,
46 )
47 configuration_group.add_argument(
48 *ac.BestSolverConfigurationArgument.names,
49 **ac.BestSolverConfigurationArgument.kwargs,
50 )
51 configuration_group.add_argument(
52 *ac.DefaultSolverConfigurationArgument.names,
53 **ac.DefaultSolverConfigurationArgument.kwargs,
54 )
55 # TODO: Allow user to specify configuration ids to use
56 # Settings arguments
57 parser.add_argument(*ac.SettingsFileArgument.names, **ac.SettingsFileArgument.kwargs)
58 parser.add_argument(
59 *Settings.OPTION_minimum_marginal_contribution.args,
60 **Settings.OPTION_minimum_marginal_contribution.kwargs,
61 )
62 parser.add_argument(*Settings.OPTION_run_on.args, **Settings.OPTION_run_on.kwargs)
63 return parser
66def judge_exist_remaining_jobs(
67 feature_data: FeatureDataFrame, performance_data: PerformanceDataFrame
68) -> bool:
69 """Return whether there are remaining feature or performance computation jobs."""
70 missing_features = feature_data.has_missing_vectors()
71 missing_performances = performance_data.has_missing_values
72 if missing_features:
73 print(
74 "There remain unperformed feature computation jobs! Please run: "
75 "'sparkle compute features'"
76 )
77 if missing_performances:
78 print(
79 "There remain unperformed performance computation jobs! Please run:\n"
80 "'sparkle cleanup --performance-data'\n"
81 "to check for missing values in the logs, otherwise run:\n"
82 "'sparkle run solvers --performance-data'\n"
83 "to compute missing values."
84 )
85 if missing_features or missing_performances:
86 print(
87 "Please first execute all unperformed jobs before constructing Sparkle "
88 "portfolio selector."
89 )
90 sys.exit(-1)
93def main(argv: list[str]) -> None:
94 """Main method of construct portfolio selector."""
95 # Define command line arguments
96 parser = parser_function()
98 # Process command line arguments
99 args = parser.parse_args(argv)
100 settings = gv.settings(args)
102 # Log command call
103 sl.log_command(sys.argv, settings.random_state)
104 check_for_initialise()
106 flag_recompute_portfolio = args.recompute_portfolio_selector
107 solver_ablation = args.solver_ablation
109 if args.objective is not None:
110 objective = resolve_objective(args.objective)
111 else:
112 objective = settings.objectives[0]
113 print(
114 "WARNING: No objective specified, defaulting to first objective from "
115 f"settings ({objective})."
116 )
117 run_on = settings.run_on
119 print("Start constructing Sparkle portfolio selector ...")
120 selector = Selector(settings.selection_class, settings.selection_model)
122 instance_set = None
123 if args.instance_set_train is not None:
124 instance_set = resolve_object_name(
125 args.instance_set_train,
126 gv.file_storage_data_mapping[gv.instances_nickname_path],
127 gv.settings().DEFAULT_instance_dir,
128 Instance_Set,
129 )
131 solver_cutoff_time = gv.settings().solver_cutoff_time
132 extractor_cutoff_time = gv.settings().extractor_cutoff_time
134 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
135 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
137 # Check that the feature data actually contains features (extractors)
138 if feature_data.num_features == 0:
139 print(
140 "ERROR: Feature data is empty! Please add a feature extractor and run "
141 "'sparkle compute features' first."
142 )
143 sys.exit(-1)
145 # Filter objective
146 performance_data.remove_objective(
147 [obj for obj in performance_data.objective_names if obj != objective.name]
148 )
149 if instance_set is not None:
150 removable_instances = [
151 i for i in performance_data.instances if i not in instance_set.instance_names
152 ]
153 performance_data.remove_instances(removable_instances)
154 feature_data.remove_instances(removable_instances)
156 if args.solvers is not None:
157 solvers = args.solvers
158 removeable_solvers = [s for s in performance_data.solvers if s not in solvers]
159 performance_data.remove_solver(removeable_solvers)
160 else:
161 solvers = sorted(
162 [str(s) for s in gv.settings().DEFAULT_solver_dir.iterdir() if s.is_dir()]
163 )
165 # Check what configurations should be considered
166 if args.best_configuration:
167 configurations = {
168 s: performance_data.best_configuration(s, objective=objective)
169 for s in solvers
170 }
171 elif args.default_configuration:
172 configurations = {s: PerformanceDataFrame.default_configuration for s in solvers}
173 else:
174 configurations = {s: performance_data.get_configurations(s) for s in solvers}
175 if not args.all_configurations: # Take the only configuration
176 if any(len(c) > 1 for c in configurations.values()):
177 print("ERROR: More than one configuration for the following solvers:")
178 for solver, config in configurations.items():
179 if len(config) > 1:
180 print(f"\t{solver}: {config} configurations")
181 raise ValueError(
182 "Please set the --all-configurations flag if you wish to use more "
183 "than one configuration per solver."
184 )
185 for solver in solvers:
186 removeable_configs = [
187 c
188 for c in performance_data.get_configurations(solver)
189 if c not in configurations[solver]
190 ]
191 performance_data.remove_configuration(solver, removeable_configs)
193 judge_exist_remaining_jobs(feature_data, performance_data)
194 if feature_data.has_missing_value():
195 print(
196 "WARNING: Missing values in the feature data, will be imputed as the mean "
197 "value of all other non-missing values! Imputing all missing values..."
198 )
199 feature_data.impute_missing_values()
201 # Filter the scenario from Solver (Configurations) that do not meet the minimum marginal contribution on the training set
202 if gv.settings().minimum_marginal_contribution > 0.0:
203 print(
204 f"Filtering the scenario from Solver (Configurations) with contribution < {gv.settings().minimum_marginal_contribution} ..."
205 )
206 for (
207 solver,
208 config_id,
209 marginal_contribution,
210 _,
211 ) in performance_data.marginal_contribution(objective=objective):
212 if marginal_contribution < gv.settings().minimum_marginal_contribution:
213 print(f"\tRemoving {solver}, {config_id} [{marginal_contribution}]")
214 performance_data.remove_configuration(solver, config_id)
216 selection_scenario = SelectionScenario(
217 gv.settings().DEFAULT_selection_output,
218 selector,
219 objective,
220 performance_data,
221 feature_data,
222 solver_cutoff=solver_cutoff_time,
223 extractor_cutoff=extractor_cutoff_time,
224 ablate=solver_ablation,
225 )
227 if selection_scenario.selector_file_path.exists():
228 if not flag_recompute_portfolio:
229 print(
230 "Portfolio selector already exists. "
231 "Set the recompute flag to remove and reconstruct."
232 )
233 sys.exit(-1)
234 # Delete all selectors
235 selection_scenario.selector_file_path.unlink(missing_ok=True)
236 if selection_scenario.ablation_scenarios:
237 for scenario in selection_scenario.ablation_scenarios:
238 scenario.selector_file_path.unlink(missing_ok=True)
240 sbatch_options = gv.settings().sbatch_settings
241 slurm_prepend = gv.settings().slurm_job_prepend
242 selector_run = selector.construct(
243 selection_scenario,
244 run_on=run_on,
245 sbatch_options=sbatch_options,
246 slurm_prepend=slurm_prepend,
247 base_dir=sl.caller_log_dir,
248 )
249 jobs = [selector_run]
250 if run_on == Runner.LOCAL:
251 print("Sparkle portfolio selector constructed!")
252 else:
253 print("Sparkle portfolio selector constructor running...")
255 # Validate the selector to run on the given instances
256 instances = [
257 resolve_instance_name(instance, Settings.DEFAULT_instance_dir)
258 for instance in performance_data.instances
259 ]
260 selector_validation = selector.run_cli(
261 selection_scenario.scenario_file,
262 instances,
263 feature_data.csv_filepath,
264 run_on=run_on,
265 sbatch_options=sbatch_options,
266 slurm_prepend=slurm_prepend,
267 dependencies=[selector_run],
268 log_dir=sl.caller_log_dir,
269 )
270 jobs.append(selector_validation)
272 if solver_ablation:
273 for ablated_scenario in selection_scenario.ablation_scenarios:
274 # Construct the ablated selector
275 ablation_run = selector.construct(
276 ablated_scenario,
277 run_on=run_on,
278 sbatch_options=sbatch_options,
279 slurm_prepend=slurm_prepend,
280 base_dir=sl.caller_log_dir,
281 )
282 # Validate the ablated selector
283 ablation_validation = selector.run_cli(
284 ablated_scenario.scenario_file,
285 instances,
286 feature_data.csv_filepath,
287 run_on=run_on,
288 sbatch_options=sbatch_options,
289 slurm_prepend=slurm_prepend,
290 job_name=f"Selector Ablation: {ablated_scenario.directory.name} on {len(instances)} instances",
291 dependencies=[ablation_run],
292 log_dir=sl.caller_log_dir,
293 )
294 jobs.extend([ablation_run, ablation_validation])
296 if run_on == Runner.LOCAL:
297 for job in jobs:
298 job.wait()
299 selector_validation.wait()
300 print("Selector validation done!")
301 else:
302 print(
303 f"Running selector construction through Slurm with job id(s): "
304 f"{', '.join([d.run_id for d in jobs])}"
305 )
307 # Write used settings to file
308 gv.settings().write_used_settings()
309 sys.exit(0)
312if __name__ == "__main__":
313 main(sys.argv[1:])