Coverage for src / sparkle / CLI / construct_portfolio_selector.py: 13%
135 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 15:31 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 15:31 +0000
1#!/usr/bin/env python3
2"""Sparkle command to construct a portfolio selector."""
4import sys
5import argparse
7from runrunner.base import Runner
9from sparkle.selector import Selector, SelectionScenario
10from sparkle.instance import Instance_Set
12from sparkle.platform.settings_objects import Settings
13from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
14from sparkle.types import resolve_objective
15from sparkle.CLI.help import global_variables as gv
16from sparkle.CLI.help import logging as sl
17from sparkle.CLI.help import argparse_custom as ac
18from sparkle.CLI.help.nicknames import resolve_object_name, resolve_instance_name
19from sparkle.CLI.initialise import check_for_initialise
22def parser_function() -> argparse.ArgumentParser:
23 """Define the command line arguments."""
24 parser = argparse.ArgumentParser(
25 description="Command to construct a portfolio selector over all known features "
26 "solver performances."
27 )
28 parser.add_argument(*ac.SolversArgument.names, **ac.SolversArgument.kwargs)
29 parser.add_argument(
30 *ac.RecomputePortfolioSelectorArgument.names,
31 **ac.RecomputePortfolioSelectorArgument.kwargs,
32 )
33 parser.add_argument(*ac.ObjectiveArgument.names, **ac.ObjectiveArgument.kwargs)
34 parser.add_argument(
35 *ac.SelectorAblationArgument.names, **ac.SelectorAblationArgument.kwargs
36 )
37 parser.add_argument(
38 *ac.InstanceSetTrainOptionalArgument.names,
39 **ac.InstanceSetTrainOptionalArgument.kwargs,
40 )
41 # Solver Configurations arguments
42 configuration_group = parser.add_mutually_exclusive_group(required=False)
43 configuration_group.add_argument(
44 *ac.AllSolverConfigurationArgument.names,
45 **ac.AllSolverConfigurationArgument.kwargs,
46 )
47 configuration_group.add_argument(
48 *ac.BestSolverConfigurationArgument.names,
49 **ac.BestSolverConfigurationArgument.kwargs,
50 )
51 configuration_group.add_argument(
52 *ac.DefaultSolverConfigurationArgument.names,
53 **ac.DefaultSolverConfigurationArgument.kwargs,
54 )
55 # TODO: Allow user to specify configuration ids to use
56 # Settings arguments
57 parser.add_argument(*ac.SettingsFileArgument.names, **ac.SettingsFileArgument.kwargs)
58 parser.add_argument(
59 *Settings.OPTION_minimum_marginal_contribution.args,
60 **Settings.OPTION_minimum_marginal_contribution.kwargs,
61 )
62 parser.add_argument(*Settings.OPTION_run_on.args, **Settings.OPTION_run_on.kwargs)
63 return parser
66def judge_exist_remaining_jobs(
67 feature_data: FeatureDataFrame, performance_data: PerformanceDataFrame
68) -> bool:
69 """Return whether there are remaining feature or performance computation jobs."""
70 missing_features = feature_data.has_missing_vectors()
71 missing_performances = performance_data.has_missing_values
72 if missing_features:
73 print(
74 "There remain unperformed feature computation jobs! Please run: "
75 "'sparkle compute features'"
76 )
77 if missing_performances:
78 print(
79 "There remain unperformed performance computation jobs! Please run:\n"
80 "'sparkle cleanup --performance-data'\n"
81 "to check for missing values in the logs, otherwise run:\n"
82 "'sparkle run solvers --performance-data'\n"
83 "to compute missing values."
84 )
85 if missing_features or missing_performances:
86 print(
87 "Please first execute all unperformed jobs before constructing Sparkle "
88 "portfolio selector."
89 )
90 sys.exit(-1)
93def main(argv: list[str]) -> None:
94 """Main method of construct portfolio selector."""
95 # Define command line arguments
96 parser = parser_function()
98 # Process command line arguments
99 args = parser.parse_args(argv)
100 settings = gv.settings(args)
102 # Log command call
103 sl.log_command(sys.argv, settings.random_state)
104 check_for_initialise()
106 flag_recompute_portfolio = args.recompute_portfolio_selector
107 solver_ablation = args.solver_ablation
109 if args.objective is not None:
110 objective = resolve_objective(args.objective)
111 else:
112 objective = settings.objectives[0]
113 print(
114 "WARNING: No objective specified, defaulting to first objective from "
115 f"settings ({objective})."
116 )
117 run_on = settings.run_on
119 print("Start constructing Sparkle portfolio selector ...")
120 if settings.selection_class is None or settings.selection_model is None:
121 print(
122 "Selector class or model not set! Values:\n"
123 f"\t - Class: {settings.selection_class}\n"
124 f"\t - Model: {settings.selection_model}\n"
125 "Please specify these values in the sparkle settings."
126 )
127 sys.exit(-1)
129 selector = Selector(settings.selection_class, settings.selection_model)
131 instance_set = None
132 if args.instance_set_train is not None:
133 instance_set = resolve_object_name(
134 args.instance_set_train,
135 gv.file_storage_data_mapping[gv.instances_nickname_path],
136 gv.settings().DEFAULT_instance_dir,
137 Instance_Set,
138 )
140 solver_cutoff_time = gv.settings().solver_cutoff_time
141 extractor_cutoff_time = gv.settings().extractor_cutoff_time
143 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
144 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
146 # Check that the feature data actually contains features (extractors)
147 if feature_data.num_features == 0:
148 print(
149 "ERROR: Feature data is empty! Please add a feature extractor and run "
150 "'sparkle compute features' first."
151 )
152 sys.exit(-1)
154 # Filter objective
155 performance_data.remove_objective(
156 [obj for obj in performance_data.objective_names if obj != objective.name]
157 )
158 if instance_set is not None:
159 removable_instances = [
160 i for i in performance_data.instances if i not in instance_set.instance_names
161 ]
162 performance_data.remove_instances(removable_instances)
163 feature_data.remove_instances(removable_instances)
165 if args.solvers is not None:
166 solvers = args.solvers
167 removeable_solvers = [s for s in performance_data.solvers if s not in solvers]
168 performance_data.remove_solver(removeable_solvers)
169 else:
170 solvers = sorted(
171 [str(s) for s in gv.settings().DEFAULT_solver_dir.iterdir() if s.is_dir()]
172 )
174 # Check what configurations should be considered
175 if args.best_configuration:
176 configurations = {
177 s: performance_data.best_configuration(s, objective=objective)
178 for s in solvers
179 }
180 elif args.default_configuration:
181 configurations = {s: PerformanceDataFrame.default_configuration for s in solvers}
182 else:
183 configurations = {s: performance_data.get_configurations(s) for s in solvers}
184 if not args.all_configurations: # Take the only configuration
185 if any(len(c) > 1 for c in configurations.values()):
186 print("ERROR: More than one configuration for the following solvers:")
187 for solver, config in configurations.items():
188 if len(config) > 1:
189 print(f"\t{solver}: {config} configurations")
190 raise ValueError(
191 "Please set the --all-configurations flag if you wish to use more "
192 "than one configuration per solver."
193 )
194 for solver in solvers:
195 removeable_configs = [
196 c
197 for c in performance_data.get_configurations(solver)
198 if c not in configurations[solver]
199 ]
200 performance_data.remove_configuration(solver, removeable_configs)
202 judge_exist_remaining_jobs(feature_data, performance_data)
203 if feature_data.has_missing_value():
204 print(
205 "WARNING: Missing values in the feature data, will be imputed as the mean "
206 "value of all other non-missing values! Imputing all missing values..."
207 )
208 feature_data.impute_missing_values()
210 # Filter the scenario from Solver (Configurations) that do not meet the minimum marginal contribution on the training set
211 if gv.settings().minimum_marginal_contribution > 0.0:
212 print(
213 f"Filtering the scenario from Solver (Configurations) with contribution < {gv.settings().minimum_marginal_contribution} ..."
214 )
215 for (
216 solver,
217 config_id,
218 marginal_contribution,
219 _,
220 ) in performance_data.marginal_contribution(objective=objective):
221 if marginal_contribution < gv.settings().minimum_marginal_contribution:
222 print(f"\tRemoving {solver}, {config_id} [{marginal_contribution}]")
223 performance_data.remove_configuration(solver, config_id)
225 selection_scenario = SelectionScenario(
226 gv.settings().DEFAULT_selection_output,
227 selector,
228 objective,
229 performance_data,
230 feature_data,
231 solver_cutoff=solver_cutoff_time,
232 extractor_cutoff=extractor_cutoff_time,
233 ablate=solver_ablation,
234 )
236 if selection_scenario.selector_file_path.exists():
237 if not flag_recompute_portfolio:
238 print(
239 "Portfolio selector already exists. "
240 "Set the recompute flag to remove and reconstruct."
241 )
242 sys.exit(-1)
243 # Delete all selectors
244 selection_scenario.selector_file_path.unlink(missing_ok=True)
245 if selection_scenario.ablation_scenarios:
246 for scenario in selection_scenario.ablation_scenarios:
247 scenario.selector_file_path.unlink(missing_ok=True)
249 sbatch_options = gv.settings().sbatch_settings
250 slurm_prepend = gv.settings().slurm_job_prepend
251 selector_run = selector.construct(
252 selection_scenario,
253 run_on=run_on,
254 sbatch_options=sbatch_options,
255 slurm_prepend=slurm_prepend,
256 base_dir=sl.caller_log_dir,
257 )
258 jobs = [selector_run]
259 if run_on == Runner.LOCAL:
260 print("Sparkle portfolio selector constructed!")
261 else:
262 print("Sparkle portfolio selector constructor running...")
264 # Validate the selector to run on the given instances
265 instances = [
266 resolve_instance_name(instance, Settings.DEFAULT_instance_dir)
267 for instance in performance_data.instances
268 ]
269 selector_validation = selector.run_cli(
270 selection_scenario.scenario_file,
271 instances,
272 feature_data.csv_filepath,
273 run_on=run_on,
274 sbatch_options=sbatch_options,
275 slurm_prepend=slurm_prepend,
276 dependencies=[selector_run],
277 log_dir=sl.caller_log_dir,
278 )
279 jobs.append(selector_validation)
281 if solver_ablation:
282 for ablated_scenario in selection_scenario.ablation_scenarios:
283 # Construct the ablated selector
284 ablation_run = selector.construct(
285 ablated_scenario,
286 run_on=run_on,
287 sbatch_options=sbatch_options,
288 slurm_prepend=slurm_prepend,
289 base_dir=sl.caller_log_dir,
290 )
291 # Validate the ablated selector
292 ablation_validation = selector.run_cli(
293 ablated_scenario.scenario_file,
294 instances,
295 feature_data.csv_filepath,
296 run_on=run_on,
297 sbatch_options=sbatch_options,
298 slurm_prepend=slurm_prepend,
299 job_name=f"Selector Ablation: {ablated_scenario.directory.name} on {len(instances)} instances",
300 dependencies=[ablation_run],
301 log_dir=sl.caller_log_dir,
302 )
303 jobs.extend([ablation_run, ablation_validation])
305 if run_on == Runner.LOCAL:
306 for job in jobs:
307 job.wait()
308 selector_validation.wait()
309 print("Selector validation done!")
310 else:
311 print(
312 f"Running selector construction through Slurm with job id(s): "
313 f"{', '.join([d.run_id for d in jobs])}"
314 )
316 # Write used settings to file
317 gv.settings().write_used_settings()
318 sys.exit(0)
321if __name__ == "__main__":
322 main(sys.argv[1:])