Coverage for sparkle/CLI/construct_portfolio_selector.py: 71%
128 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
1#!/usr/bin/env python3
2"""Sparkle command to construct a portfolio selector."""
3import sys
4import argparse
5from pathlib import Path
7import runrunner as rrr
8from runrunner.base import Runner
10from sparkle.selector import Selector, SelectionScenario
11from sparkle.instance import Instance_Set
13from sparkle.platform.settings_objects import SettingState
14from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
15from sparkle.types import resolve_objective
16from sparkle.CLI.help import global_variables as gv
17from sparkle.CLI.help import logging as sl
18from sparkle.CLI.help import argparse_custom as ac
19from sparkle.CLI.help.nicknames import resolve_object_name
20from sparkle.CLI.initialise import check_for_initialise
23def parser_function() -> argparse.ArgumentParser:
24 """Define the command line arguments."""
25 parser = argparse.ArgumentParser(
26 description="Command to construct a portfolio selector over all known features "
27 "solver performances.")
28 parser.add_argument(*ac.SolversArgument.names,
29 **ac.SolversArgument.kwargs)
30 parser.add_argument(*ac.RecomputePortfolioSelectorArgument.names,
31 **ac.RecomputePortfolioSelectorArgument.kwargs)
32 parser.add_argument(*ac.ObjectiveArgument.names,
33 **ac.ObjectiveArgument.kwargs)
34 parser.add_argument(*ac.SelectorAblationArgument.names,
35 **ac.SelectorAblationArgument.kwargs)
36 parser.add_argument(*ac.InstanceSetTrainOptionalArgument.names,
37 **ac.InstanceSetTrainOptionalArgument.kwargs)
38 parser.add_argument(*ac.RunOnArgument.names,
39 **ac.RunOnArgument.kwargs)
40 parser.add_argument(*ac.SettingsFileArgument.names,
41 **ac.SettingsFileArgument.kwargs)
42 # Solver Configurations arguments
43 configuration_group = parser.add_mutually_exclusive_group(required=False)
44 configuration_group.add_argument(*ac.AllSolverConfigurationArgument.names,
45 **ac.AllSolverConfigurationArgument.kwargs)
46 configuration_group.add_argument(*ac.BestSolverConfigurationArgument.names,
47 **ac.BestSolverConfigurationArgument.kwargs)
48 configuration_group.add_argument(*ac.DefaultSolverConfigurationArgument.names,
49 **ac.DefaultSolverConfigurationArgument.kwargs)
50 # TODO: Allow user to specify configuration ids to use
51 return parser
54def judge_exist_remaining_jobs(feature_data: FeatureDataFrame,
55 performance_data: PerformanceDataFrame) -> bool:
56 """Return whether there are remaining feature or performance computation jobs."""
57 missing_features = feature_data.has_missing_vectors()
58 missing_performances = performance_data.has_missing_values
59 if missing_features:
60 print("There remain unperformed feature computation jobs! Please run: "
61 "'sparkle compute features'")
62 if missing_performances:
63 print("There remain unperformed performance computation jobs! Please run: "
64 "'sparkle run solvers --performance-data'")
65 if missing_features or missing_performances:
66 print("Please first execute all unperformed jobs before constructing Sparkle "
67 "portfolio selector.")
68 sys.exit(-1)
71def main(argv: list[str]) -> None:
72 """Main method of construct portfolio selector."""
73 # Log command call
74 sl.log_command(sys.argv)
75 check_for_initialise()
77 # Define command line arguments
78 parser = parser_function()
80 # Process command line arguments
81 args = parser.parse_args(argv)
82 flag_recompute_portfolio = args.recompute_portfolio_selector
83 solver_ablation = args.solver_ablation
85 if args.settings_file is not None:
86 # Do first, so other command line options can override settings from the file
87 gv.settings().read_settings_ini(args.settings_file, SettingState.CMD_LINE)
88 if args.objective is not None:
89 objective = resolve_objective(args.objective)
90 else:
91 objective = gv.settings().get_general_sparkle_objectives()[0]
92 print("WARNING: No objective specified, defaulting to first objective from "
93 f"settings ({objective}).")
94 if args.run_on is not None:
95 gv.settings().set_run_on(args.run_on.value, SettingState.CMD_LINE)
96 run_on = gv.settings().get_run_on()
98 print("Start constructing Sparkle portfolio selector ...")
99 selector = Selector(gv.settings().get_selection_class(),
100 gv.settings().get_selection_model())
102 instance_set = None
103 if args.instance_set_train is not None:
104 instance_set = resolve_object_name(
105 args.instance_set_train,
106 gv.file_storage_data_mapping[gv.instances_nickname_path],
107 gv.settings().DEFAULT_instance_dir, Instance_Set)
109 solver_cutoff_time = gv.settings().get_general_solver_cutoff_time()
110 extractor_cutoff_time = gv.settings().get_general_extractor_cutoff_time()
112 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
113 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
115 # Check that the feature data actually contains features (extractors)
116 if feature_data.num_features == 0:
117 print("ERROR: Feature data is empty! Please add a feature extractor and run "
118 "'sparkle compute features' first.")
119 sys.exit(-1)
121 # Filter objective
122 performance_data.remove_objective([obj for obj in performance_data.objective_names
123 if obj != objective.name])
124 if instance_set is not None:
125 removable_instances = [i for i in performance_data.instances
126 if i not in instance_set.instance_names]
127 performance_data.remove_instances(removable_instances)
128 feature_data.remove_instances(removable_instances)
130 if args.solvers is not None:
131 solvers = args.solvers
132 removeable_solvers = [s for s in performance_data.solvers if s not in solvers]
133 performance_data.remove_solver(removeable_solvers)
134 else:
135 solvers = sorted([str(s) for s in gv.settings().DEFAULT_solver_dir.iterdir()
136 if s.is_dir()])
138 # Check what configurations should be considered
139 if args.best_configuration:
140 configurations = {s: performance_data.best_configuration(s, objective=objective)
141 for s in solvers}
142 elif args.default_configuration:
143 configurations = {s: PerformanceDataFrame.default_configuration for s in solvers}
144 else:
145 configurations = {s: performance_data.get_configurations(s) for s in solvers}
146 if not args.all_configurations: # Take the only configuration
147 if any(len(c) > 1 for c in configurations.values()):
148 print("ERROR: More than one configuration for the following solvers:")
149 for solver, config in configurations.items():
150 if len(config) > 1:
151 print(f"\t{solver}: {config} configurations")
152 raise ValueError(
153 "Please set the --all-configurations flag if you wish to use more "
154 "than one configuration per solver.")
155 for solver in solvers:
156 removeable_configs = [c for c in performance_data.get_configurations(solver)
157 if c not in configurations[solver]]
158 performance_data.remove_configuration(solver, removeable_configs)
160 judge_exist_remaining_jobs(feature_data, performance_data)
161 if feature_data.has_missing_value():
162 print("WARNING: Missing values in the feature data, will be imputed as the mean "
163 "value of all other non-missing values! Imputing all missing values...")
164 feature_data.impute_missing_values()
166 selection_scenario = SelectionScenario(gv.settings().DEFAULT_selection_output,
167 selector,
168 objective,
169 performance_data,
170 feature_data,
171 solver_cutoff=solver_cutoff_time,
172 extractor_cutoff=extractor_cutoff_time,
173 ablate=solver_ablation)
175 if selection_scenario.selector_file_path.exists():
176 if not flag_recompute_portfolio:
177 print("Portfolio selector already exists. "
178 "Set the recompute flag to remove and reconstruct.")
179 sys.exit(-1)
180 # Delete all selectors
181 selection_scenario.selector_file_path.unlink(missing_ok=True)
182 if selection_scenario.ablation_scenarios:
183 for scenario in selection_scenario.ablation_scenarios:
184 scenario.selector_file_path.unlink(missing_ok=True)
186 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True)
187 slurm_prepend = gv.settings().get_slurm_job_prepend()
188 selector_run = selector.construct(selection_scenario,
189 run_on=run_on,
190 sbatch_options=sbatch_options,
191 slurm_prepend=slurm_prepend,
192 base_dir=sl.caller_log_dir)
193 if run_on == Runner.LOCAL:
194 print("Sparkle portfolio selector constructed!")
195 else:
196 print("Sparkle portfolio selector constructor running...")
198 dependencies = [selector_run]
199 if solver_ablation:
200 for ablated_scenario in selection_scenario.ablation_scenarios:
201 selector_run = selector.construct(
202 ablated_scenario,
203 run_on=run_on,
204 sbatch_options=sbatch_options,
205 slurm_prepend=slurm_prepend,
206 base_dir=sl.caller_log_dir)
208 # Compute the marginal contribution
209 with_actual = "--actual" if solver_ablation else ""
210 cmd = (f"python3 sparkle/CLI/compute_marginal_contribution.py --selection-scenario "
211 f"{selection_scenario.scenario_file} --perfect {with_actual}")
212 solver_names = ", ".join([Path(s).name for s in performance_data.solvers])
213 marginal_contribution = rrr.add_to_queue(
214 runner=run_on,
215 cmd=cmd,
216 name=f"Marginal Contribution computation: {solver_names}",
217 base_dir=sl.caller_log_dir,
218 dependencies=dependencies,
219 sbatch_options=sbatch_options,
220 prepend=gv.settings().get_slurm_job_prepend())
221 dependencies.append(marginal_contribution)
222 if run_on == Runner.LOCAL:
223 marginal_contribution.wait()
224 print("Selector marginal contribution computing done!")
225 else:
226 print(f"Running selector construction through Slurm with job id(s): "
227 f"{', '.join([d.run_id for d in dependencies])}")
229 # Write used settings to file
230 gv.settings().write_used_settings()
231 sys.exit(0)
234if __name__ == "__main__":
235 main(sys.argv[1:])