Coverage for sparkle/CLI/construct_portfolio_selector.py: 0%
99 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-27 09:10 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-27 09:10 +0000
1#!/usr/bin/env python3
2"""Sparkle command to construct a portfolio selector."""
3import sys
4import argparse
5from pathlib import Path
7import runrunner as rrr
8from runrunner.base import Runner
10from sparkle.CLI.help import global_variables as gv
11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
12from sparkle.CLI.help import logging as sl
13from sparkle.platform.settings_objects import SettingState
14from sparkle.CLI.help import argparse_custom as ac
15from sparkle.CLI.help.reporting_scenario import Scenario
16from sparkle.platform import CommandName, COMMAND_DEPENDENCIES
17from sparkle.CLI.initialise import check_for_initialise
20def parser_function() -> argparse.ArgumentParser:
21 """Define the command line arguments."""
22 parser = argparse.ArgumentParser()
23 parser.add_argument(*ac.RecomputePortfolioSelectorArgument.names,
24 **ac.RecomputePortfolioSelectorArgument.kwargs)
25 parser.add_argument(*ac.RecomputeMarginalContributionForSelectorArgument.names,
26 **ac.RecomputeMarginalContributionForSelectorArgument.kwargs)
27 parser.add_argument(*ac.SelectorTimeoutArgument.names,
28 **ac.SelectorTimeoutArgument.kwargs)
29 parser.add_argument(*ac.SparkleObjectiveArgument.names,
30 **ac.SparkleObjectiveArgument.kwargs)
31 parser.add_argument(*ac.SelectorAblationArgument.names,
32 **ac.SelectorAblationArgument.kwargs)
33 parser.add_argument(*ac.RunOnArgument.names,
34 **ac.RunOnArgument.kwargs)
35 return parser
38def judge_exist_remaining_jobs(feature_data_csv: Path,
39 performance_data_csv: Path) -> bool:
40 """Return whether there are remaining feature or performance computation jobs."""
41 feature_data = FeatureDataFrame(feature_data_csv)
42 if feature_data.has_missing_vectors():
43 return True
44 performance_data = PerformanceDataFrame(performance_data_csv)
45 return performance_data.has_missing_values
48if __name__ == "__main__":
49 # Log command call
50 sl.log_command(sys.argv)
52 # Define command line arguments
53 parser = parser_function()
55 # Process command line arguments
56 args = parser.parse_args()
57 selector_timeout = args.selector_timeout
58 flag_recompute_portfolio = args.recompute_portfolio_selector
59 flag_recompute_marg_cont = args.recompute_marginal_contribution
60 solver_ablation = args.solver_ablation
62 check_for_initialise(
63 COMMAND_DEPENDENCIES[CommandName.CONSTRUCT_PORTFOLIO_SELECTOR]
64 )
66 if ac.set_by_user(args, "objectives"):
67 gv.settings().set_general_sparkle_objectives(
68 args.objectives, SettingState.CMD_LINE
69 )
70 if args.run_on is not None:
71 gv.settings().set_run_on(
72 args.run_on.value, SettingState.CMD_LINE)
73 run_on = gv.settings().get_run_on()
75 print("Start constructing Sparkle portfolio selector ...")
76 selector = gv.settings().get_general_sparkle_selector()
78 flag_judge_exist_remaining_jobs = judge_exist_remaining_jobs(
79 gv.settings().DEFAULT_feature_data_path,
80 gv.settings().DEFAULT_performance_data_path)
82 if flag_judge_exist_remaining_jobs:
83 print("There remain unperformed feature computation jobs or performance "
84 "computation jobs!")
85 print("Please first execute all unperformed jobs before constructing Sparkle "
86 "portfolio selector")
87 print("Sparkle portfolio selector is not successfully constructed!")
88 sys.exit(-1)
90 # Selector (AutoFolio) cannot handle cutoff time less than 2, adjust if needed
91 cutoff_time = max(gv.settings().get_general_target_cutoff_time(), 2)
93 # Determine the objective function
94 objective = gv.settings().get_general_sparkle_objectives()[0]
96 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
97 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
99 if feature_data.has_missing_value():
100 print("WARNING: Missing values in the feature data, will be imputed as the mean "
101 "value of all other non-missing values! Imputing all missing values...")
102 feature_data.impute_missing_values()
104 # TODO: Allow user to specify subsets of data to be used
106 # Selector is named after the solvers it can predict, sort for permutation invariance
107 solvers = sorted([s.name for s in gv.settings().DEFAULT_solver_dir.iterdir()])
108 selection_scenario_path = (
109 gv.settings().DEFAULT_selection_output
110 / gv.settings().DEFAULT_general_sparkle_selector.name
111 / "_".join(solvers))
113 # Update latest scenario
114 gv.latest_scenario().set_selection_scenario_path(selection_scenario_path)
115 gv.latest_scenario().set_latest_scenario(Scenario.SELECTION)
116 # Set to default to overwrite possible old path
117 gv.latest_scenario().set_selection_test_case_directory()
119 selector_path = selection_scenario_path / "portfolio_selector"
120 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True)
121 if selector_path.exists() and not flag_recompute_portfolio:
122 print("Portfolio selector already exists. Set the recompute flag to re-create.")
123 sys.exit()
125 selector_path.parent.mkdir(exist_ok=True, parents=True)
127 selector_run = selector.construct(selector_path,
128 performance_data,
129 feature_data,
130 objective,
131 cutoff_time,
132 selector_timeout,
133 run_on=run_on,
134 sbatch_options=sbatch_options,
135 base_dir=sl.caller_log_dir)
136 if run_on == Runner.LOCAL:
137 print("Sparkle portfolio selector constructed!")
138 else:
139 print("Sparkle portfolio selector constructor running...")
141 dependencies = [selector_run]
142 if solver_ablation:
143 for solver in performance_data.solvers:
144 solver_name = Path(solver).name
145 ablate_solver_dir = selection_scenario_path / f"ablate_{solver_name}"
146 ablate_solver_selector = ablate_solver_dir / "portfolio_selector"
147 if (ablate_solver_selector.exists() and not flag_recompute_portfolio):
148 print(f"Portfolio selector without {solver_name} already exists. "
149 "Set the recompute flag to re-create.")
150 continue
151 ablate_solver_dir.mkdir(exist_ok=True, parents=True)
152 ablated_performance_data = performance_data.copy()
153 ablated_performance_data.remove_solver(solver)
154 ablated_run = selector.construct(ablate_solver_selector,
155 ablated_performance_data,
156 feature_data,
157 objective,
158 cutoff_time,
159 selector_timeout,
160 run_on=run_on,
161 sbatch_options=sbatch_options,
162 base_dir=sl.caller_log_dir)
163 dependencies.append(ablated_run)
164 if run_on == Runner.LOCAL:
165 print(f"Portfolio selector without {solver_name} constructed!")
166 else:
167 print(f"Portfolio selector without {solver_name} constructor running...")
168 with_actual = "--actual" if solver_ablation else ""
169 cmd = (f"sparkle/CLI/compute_marginal_contribution.py --perfect {with_actual} "
170 f"{ac.SparkleObjectiveArgument.names[0]} {objective}")
172 marginal_contribution = rrr.add_to_queue(
173 runner=run_on,
174 cmd=cmd,
175 name=CommandName.COMPUTE_MARGINAL_CONTRIBUTION,
176 base_dir=sl.caller_log_dir,
177 dependencies=dependencies,
178 sbatch_options=sbatch_options)
179 dependencies.append(marginal_contribution)
180 if run_on == Runner.LOCAL:
181 marginal_contribution.wait()
182 print("Selector marginal contribution computing done!")
183 else:
184 print(f"Running selector construction. Waiting for Slurm job(s) with id(s): "
185 f"{', '.join([d.run_id for d in dependencies])}")
187 # Write used settings to file
188 gv.settings().write_used_settings()
189 # Write used scenario to file
190 gv.latest_scenario().write_scenario_ini()