Coverage for sparkle/CLI/construct_portfolio_selector.py: 72%
107 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
1#!/usr/bin/env python3
2"""Sparkle command to construct a portfolio selector."""
3import sys
4import argparse
5from pathlib import Path
7import runrunner as rrr
8from runrunner.base import Runner
10from sparkle.platform.settings_objects import SettingState
11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
12from sparkle.types import resolve_objective
13from sparkle.CLI.help import global_variables as gv
14from sparkle.CLI.help import logging as sl
15from sparkle.CLI.help import argparse_custom as ac
16from sparkle.CLI.help.reporting_scenario import Scenario
17from sparkle.CLI.initialise import check_for_initialise
20def parser_function() -> argparse.ArgumentParser:
21 """Define the command line arguments."""
22 parser = argparse.ArgumentParser(
23 description="Command to construct a portfolio selector over all known features "
24 "solver performances.")
25 parser.add_argument(*ac.RecomputePortfolioSelectorArgument.names,
26 **ac.RecomputePortfolioSelectorArgument.kwargs)
27 parser.add_argument(*ac.SelectorTimeoutArgument.names,
28 **ac.SelectorTimeoutArgument.kwargs)
29 parser.add_argument(*ac.ObjectiveArgument.names,
30 **ac.ObjectiveArgument.kwargs)
31 parser.add_argument(*ac.SelectorAblationArgument.names,
32 **ac.SelectorAblationArgument.kwargs)
33 parser.add_argument(*ac.RunOnArgument.names,
34 **ac.RunOnArgument.kwargs)
35 parser.add_argument(*ac.SettingsFileArgument.names,
36 **ac.SettingsFileArgument.kwargs)
37 return parser
40def judge_exist_remaining_jobs(feature_data_csv: Path,
41 performance_data_csv: Path) -> bool:
42 """Return whether there are remaining feature or performance computation jobs."""
43 feature_data = FeatureDataFrame(feature_data_csv)
44 performance_data = PerformanceDataFrame(performance_data_csv)
45 missing_features = feature_data.has_missing_vectors()
46 missing_performances = performance_data.has_missing_values
47 if missing_features:
48 print("There remain unperformed feature computation jobs!")
49 if missing_performances:
50 print("There remain unperformed performance computation jobs!")
51 if missing_features or missing_performances:
52 print("Please first execute all unperformed jobs before constructing Sparkle "
53 "portfolio selector")
54 print("Sparkle portfolio selector is not successfully constructed!")
55 sys.exit(-1)
58def main(argv: list[str]) -> None:
59 """Main method of construct portfolio selector."""
60 # Log command call
61 sl.log_command(sys.argv)
62 check_for_initialise()
64 # Define command line arguments
65 parser = parser_function()
67 # Process command line arguments
68 args = parser.parse_args(argv)
69 selector_timeout = args.selector_timeout
70 flag_recompute_portfolio = args.recompute_portfolio_selector
71 solver_ablation = args.solver_ablation
73 if ac.set_by_user(args, "settings_file"):
74 gv.settings().read_settings_ini(
75 args.settings_file, SettingState.CMD_LINE
76 ) # Do first, so other command line options can override settings from the file
77 if ac.set_by_user(args, "objective"):
78 objective = resolve_objective(args.objective)
79 else:
80 objective = gv.settings().get_general_sparkle_objectives()[0]
81 print("WARNING: No objective specified, defaulting to first objective from "
82 f"settings ({objective}).")
83 if args.run_on is not None:
84 gv.settings().set_run_on(
85 args.run_on.value, SettingState.CMD_LINE)
86 run_on = gv.settings().get_run_on()
88 print("Start constructing Sparkle portfolio selector ...")
89 selector = gv.settings().get_general_sparkle_selector()
91 judge_exist_remaining_jobs(
92 gv.settings().DEFAULT_feature_data_path,
93 gv.settings().DEFAULT_performance_data_path)
95 # Selector (AutoFolio) cannot handle cutoff time less than 2, adjust if needed
96 cutoff_time = max(gv.settings().get_general_target_cutoff_time(), 2)
98 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
99 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
101 if feature_data.has_missing_value():
102 print("WARNING: Missing values in the feature data, will be imputed as the mean "
103 "value of all other non-missing values! Imputing all missing values...")
104 feature_data.impute_missing_values()
106 # TODO: Allow user to specify subsets of data to be used
108 # Selector is named after the solvers it can predict, sort for permutation invariance
109 solvers = sorted([s.name for s in gv.settings().DEFAULT_solver_dir.iterdir()])
110 selection_scenario_path = (
111 gv.settings().DEFAULT_selection_output
112 / gv.settings().DEFAULT_general_sparkle_selector.name
113 / "_".join(solvers))
115 # Update latest scenario
116 gv.latest_scenario().set_selection_scenario_path(selection_scenario_path)
117 gv.latest_scenario().set_latest_scenario(Scenario.SELECTION)
118 # Set to default to overwrite possible old path
119 gv.latest_scenario().set_selection_test_case_directory()
121 selector_path = selection_scenario_path / "portfolio_selector"
122 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True)
123 if selector_path.exists() and not flag_recompute_portfolio:
124 print("Portfolio selector already exists. Set the recompute flag to re-create.")
125 sys.exit()
127 selector_path.parent.mkdir(exist_ok=True, parents=True)
129 selector_run = selector.construct(selector_path,
130 performance_data,
131 feature_data,
132 objective,
133 cutoff_time,
134 selector_timeout,
135 run_on=run_on,
136 sbatch_options=sbatch_options,
137 base_dir=sl.caller_log_dir)
138 if run_on == Runner.LOCAL:
139 print("Sparkle portfolio selector constructed!")
140 else:
141 print("Sparkle portfolio selector constructor running...")
143 dependencies = [selector_run]
144 if solver_ablation:
145 for solver in performance_data.solvers:
146 solver_name = Path(solver).name
147 ablate_solver_dir = selection_scenario_path / f"ablate_{solver_name}"
148 ablate_solver_selector = ablate_solver_dir / "portfolio_selector"
149 if (ablate_solver_selector.exists() and not flag_recompute_portfolio):
150 print(f"Portfolio selector without {solver_name} already exists. "
151 "Set the recompute flag to re-create.")
152 continue
153 ablate_solver_dir.mkdir(exist_ok=True, parents=True)
154 ablated_performance_data = performance_data.clone()
155 ablated_performance_data.remove_solver(solver)
156 ablated_run = selector.construct(ablate_solver_selector,
157 ablated_performance_data,
158 feature_data,
159 objective,
160 cutoff_time,
161 selector_timeout,
162 run_on=run_on,
163 sbatch_options=sbatch_options,
164 base_dir=sl.caller_log_dir)
165 dependencies.append(ablated_run)
166 if run_on == Runner.LOCAL:
167 print(f"Portfolio selector without {solver_name} constructed!")
168 else:
169 print(f"Portfolio selector without {solver_name} constructor running...")
171 # Compute the marginal contribution
172 with_actual = "--actual" if solver_ablation else ""
173 cmd = (f"python3 sparkle/CLI/compute_marginal_contribution.py --perfect "
174 f"{with_actual} {ac.ObjectivesArgument.names[0]} {objective}")
175 solver_names = ", ".join([Path(s).name for s in performance_data.solvers])
176 marginal_contribution = rrr.add_to_queue(
177 runner=run_on,
178 cmd=cmd,
179 name=f"Marginal Contribution computation: {solver_names}",
180 base_dir=sl.caller_log_dir,
181 dependencies=dependencies,
182 sbatch_options=sbatch_options)
183 dependencies.append(marginal_contribution)
184 if run_on == Runner.LOCAL:
185 marginal_contribution.wait()
186 print("Selector marginal contribution computing done!")
187 else:
188 print(f"Running selector construction. Waiting for Slurm job(s) with id(s): "
189 f"{', '.join([d.run_id for d in dependencies])}")
191 # Write used settings to file
192 gv.settings().write_used_settings()
193 # Write used scenario to file
194 gv.latest_scenario().write_scenario_ini()
195 sys.exit(0)
198if __name__ == "__main__":
199 main(sys.argv[1:])