Coverage for sparkle/CLI/construct_portfolio_selector.py: 72%
107 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
1#!/usr/bin/env python3
2"""Sparkle command to construct a portfolio selector."""
3import sys
4import argparse
5from pathlib import Path
7import runrunner as rrr
8from runrunner.base import Runner
10from sparkle.solver import Selector
11from sparkle.platform.settings_objects import SettingState
12from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
13from sparkle.types import resolve_objective
14from sparkle.CLI.help import global_variables as gv
15from sparkle.CLI.help import logging as sl
16from sparkle.CLI.help import argparse_custom as ac
17from sparkle.CLI.help.reporting_scenario import Scenario
18from sparkle.CLI.initialise import check_for_initialise
21def parser_function() -> argparse.ArgumentParser:
22 """Define the command line arguments."""
23 parser = argparse.ArgumentParser(
24 description="Command to construct a portfolio selector over all known features "
25 "solver performances.")
26 parser.add_argument(*ac.RecomputePortfolioSelectorArgument.names,
27 **ac.RecomputePortfolioSelectorArgument.kwargs)
28 parser.add_argument(*ac.ObjectiveArgument.names,
29 **ac.ObjectiveArgument.kwargs)
30 parser.add_argument(*ac.SelectorAblationArgument.names,
31 **ac.SelectorAblationArgument.kwargs)
32 parser.add_argument(*ac.RunOnArgument.names,
33 **ac.RunOnArgument.kwargs)
34 parser.add_argument(*ac.SettingsFileArgument.names,
35 **ac.SettingsFileArgument.kwargs)
36 return parser
39def judge_exist_remaining_jobs(feature_data_csv: Path,
40 performance_data_csv: Path) -> bool:
41 """Return whether there are remaining feature or performance computation jobs."""
42 feature_data = FeatureDataFrame(feature_data_csv)
43 performance_data = PerformanceDataFrame(performance_data_csv)
44 missing_features = feature_data.has_missing_vectors()
45 missing_performances = performance_data.has_missing_values
46 if missing_features:
47 print("There remain unperformed feature computation jobs!")
48 if missing_performances:
49 print("There remain unperformed performance computation jobs!")
50 if missing_features or missing_performances:
51 print("Please first execute all unperformed jobs before constructing Sparkle "
52 "portfolio selector")
53 print("Sparkle portfolio selector is not successfully constructed!")
54 sys.exit(-1)
57def main(argv: list[str]) -> None:
58 """Main method of construct portfolio selector."""
59 # Log command call
60 sl.log_command(sys.argv)
61 check_for_initialise()
63 # Define command line arguments
64 parser = parser_function()
66 # Process command line arguments
67 args = parser.parse_args(argv)
68 flag_recompute_portfolio = args.recompute_portfolio_selector
69 solver_ablation = args.solver_ablation
71 if ac.set_by_user(args, "settings_file"):
72 gv.settings().read_settings_ini(
73 args.settings_file, SettingState.CMD_LINE
74 ) # Do first, so other command line options can override settings from the file
75 if ac.set_by_user(args, "objective"):
76 objective = resolve_objective(args.objective)
77 else:
78 objective = gv.settings().get_general_sparkle_objectives()[0]
79 print("WARNING: No objective specified, defaulting to first objective from "
80 f"settings ({objective}).")
81 if args.run_on is not None:
82 gv.settings().set_run_on(
83 args.run_on.value, SettingState.CMD_LINE)
84 run_on = gv.settings().get_run_on()
86 print("Start constructing Sparkle portfolio selector ...")
87 selector = Selector(gv.settings().get_selection_class(),
88 gv.settings().get_selection_model())
90 judge_exist_remaining_jobs(
91 gv.settings().DEFAULT_feature_data_path,
92 gv.settings().DEFAULT_performance_data_path)
94 cutoff_time = gv.settings().get_general_target_cutoff_time()
96 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
97 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
99 if feature_data.has_missing_value():
100 print("WARNING: Missing values in the feature data, will be imputed as the mean "
101 "value of all other non-missing values! Imputing all missing values...")
102 feature_data.impute_missing_values()
104 # TODO: Allow user to specify subsets of data to be used
106 # Selector is named after the solvers it can predict, sort for permutation invariance
107 solvers = sorted([s.name for s in gv.settings().DEFAULT_solver_dir.iterdir()])
108 selection_scenario_path =\
109 gv.settings().DEFAULT_selection_output / selector.name / "_".join(solvers)
111 # Update latest scenario
112 gv.latest_scenario().set_selection_scenario_path(selection_scenario_path)
113 gv.latest_scenario().set_latest_scenario(Scenario.SELECTION)
114 # Set to default to overwrite possible old path
115 gv.latest_scenario().set_selection_test_case_directory()
117 selector_path = selection_scenario_path / "portfolio_selector"
118 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True)
119 if selector_path.exists() and not flag_recompute_portfolio:
120 print("Portfolio selector already exists. Set the recompute flag to re-create.")
121 sys.exit()
123 selector_path.parent.mkdir(exist_ok=True, parents=True)
124 slurm_prepend = gv.settings().get_slurm_job_prepend()
125 selector_run = selector.construct(selector_path,
126 performance_data,
127 feature_data,
128 objective,
129 cutoff_time,
130 run_on=run_on,
131 sbatch_options=sbatch_options,
132 slurm_prepend=slurm_prepend,
133 base_dir=sl.caller_log_dir)
134 if run_on == Runner.LOCAL:
135 print("Sparkle portfolio selector constructed!")
136 else:
137 print("Sparkle portfolio selector constructor running...")
139 dependencies = [selector_run]
140 if solver_ablation:
141 for solver in performance_data.solvers:
142 solver_name = Path(solver).name
143 ablate_solver_dir = selection_scenario_path / f"ablate_{solver_name}"
144 ablate_solver_selector = ablate_solver_dir / "portfolio_selector"
145 if (ablate_solver_selector.exists() and not flag_recompute_portfolio):
146 print(f"Portfolio selector without {solver_name} already exists. "
147 "Set the recompute flag to re-create.")
148 continue
149 ablate_solver_dir.mkdir(exist_ok=True, parents=True)
150 ablated_performance_data = performance_data.clone()
151 ablated_performance_data.remove_solver(solver)
152 ablated_run = selector.construct(ablate_solver_selector,
153 ablated_performance_data,
154 feature_data,
155 objective,
156 cutoff_time,
157 run_on=run_on,
158 sbatch_options=sbatch_options,
159 slurm_prepend=slurm_prepend,
160 base_dir=sl.caller_log_dir)
161 dependencies.append(ablated_run)
162 if run_on == Runner.LOCAL:
163 print(f"Portfolio selector without {solver_name} constructed!")
164 else:
165 print(f"Portfolio selector without {solver_name} constructor running...")
167 # Compute the marginal contribution
168 with_actual = "--actual" if solver_ablation else ""
169 cmd = (f"python3 sparkle/CLI/compute_marginal_contribution.py --perfect "
170 f"{with_actual} {ac.ObjectivesArgument.names[0]} {objective}")
171 solver_names = ", ".join([Path(s).name for s in performance_data.solvers])
172 marginal_contribution = rrr.add_to_queue(
173 runner=run_on,
174 cmd=cmd,
175 name=f"Marginal Contribution computation: {solver_names}",
176 base_dir=sl.caller_log_dir,
177 dependencies=dependencies,
178 sbatch_options=sbatch_options,
179 prepend=gv.settings().get_slurm_job_prepend())
180 dependencies.append(marginal_contribution)
181 if run_on == Runner.LOCAL:
182 marginal_contribution.wait()
183 print("Selector marginal contribution computing done!")
184 else:
185 print(f"Running selector construction. Waiting for Slurm job(s) with id(s): "
186 f"{', '.join([d.run_id for d in dependencies])}")
188 # Write used settings to file
189 gv.settings().write_used_settings()
190 # Write used scenario to file
191 gv.latest_scenario().write_scenario_ini()
192 sys.exit(0)
195if __name__ == "__main__":
196 main(sys.argv[1:])