Coverage for sparkle/CLI/compute_marginal_contribution.py: 34%
94 statements
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-27 09:10 +0000
« prev ^ index » next coverage.py v7.6.1, created at 2024-09-27 09:10 +0000
1#!/usr/bin/env python3
2"""Sparkle command for the computation of the marginal contributions."""
3import sys
4import argparse
5from pathlib import Path
6import operator
8import tabulate
10from sparkle.CLI.help import global_variables as gv
11from sparkle.CLI.help import logging as sl
12from sparkle.platform.settings_objects import SettingState
13from sparkle.CLI.help import argparse_custom as ac
14from sparkle.platform import CommandName, COMMAND_DEPENDENCIES
15from sparkle.CLI.initialise import check_for_initialise
16from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
17from sparkle.types import SparkleObjective
20def parser_function() -> argparse.ArgumentParser:
21 """Define the command line arguments."""
22 parser = argparse.ArgumentParser()
23 parser.add_argument(*ac.PerfectSelectorMarginalContributionArgument.names,
24 **ac.PerfectSelectorMarginalContributionArgument.kwargs)
25 parser.add_argument(*ac.ActualMarginalContributionArgument.names,
26 **ac.ActualMarginalContributionArgument.kwargs)
27 parser.add_argument(*ac.SparkleObjectiveArgument.names,
28 **ac.SparkleObjectiveArgument.kwargs)
29 parser.add_argument(*ac.SettingsFileArgument.names,
30 **ac.SettingsFileArgument.kwargs)
32 return parser
35def compute_selector_performance(
36 actual_portfolio_selector: Path,
37 performance_data: PerformanceDataFrame,
38 feature_data: FeatureDataFrame,
39 objective: SparkleObjective) -> float:
40 """Return the performance of a selector over all instances.
42 Args:
43 actual_portfolio_selector: Path to portfolio selector.
44 performance_data: The performance data.
45 feature_data: The feature data.
46 objective: Objective to compute the performance for
48 Returns:
49 The selector performance as a single floating point number.
50 """
51 performance_path = actual_portfolio_selector.parent / "performance.csv"
52 if performance_path.exists():
53 selector_performance_data = PerformanceDataFrame(performance_path)
54 return objective.instance_aggregator(
55 selector_performance_data.get_values("portfolio_selector"))
56 selector_performance_data = performance_data.copy()
58 selector_performance_data.add_solver("portfolio_selector")
59 selector_performance_data.csv_filepath =\
60 actual_portfolio_selector.parent / "performance.csv"
61 selector = gv.settings().get_general_sparkle_selector()
63 schedule = {}
64 for instance in performance_data.instances:
65 # We get the performance for an instance by infering the model predicition
66 # for the instance.
67 feature_vector = feature_data.get_instance(instance)
68 schedule[instance] = selector.run(actual_portfolio_selector, feature_vector)
69 schedule_performance = selector_performance_data.schedule_performance(
70 schedule, target_solver="portfolio_selector", objective=objective)
71 # Remove solvers from the dataframe
72 selector_performance_data.remove_solver(performance_data.solvers)
73 selector_performance_data.save_csv() # Save the results to disk
74 return objective.instance_aggregator(schedule_performance)
77def compute_selector_marginal_contribution(
78 performance_data: PerformanceDataFrame,
79 feature_data: FeatureDataFrame,
80 selector_scenario: Path,
81 objective: SparkleObjective) -> list[tuple[str, float]]:
82 """Compute the marginal contributions of solvers in the selector.
84 Args:
85 performance_data: Performance data object
86 feature_data_csv_path: Path to the CSV file with the feature data.
87 selector_scenario: Path to the selector scenario for which to compute
88 marginal contribution.
89 objective: Objective to compute the marginal contribution for.
91 Returns:
92 A list of 2-tuples where every 2-tuple is of the form
93 (solver name, marginal contribution, best_performance).
94 """
95 portfolio_selector_path = selector_scenario / "portfolio_selector"
97 if not portfolio_selector_path.exists():
98 print(f"ERROR: Selector {portfolio_selector_path} does not exist! "
99 "Cannot compute marginal contribution.")
100 sys.exit(-1)
102 selector_performance = compute_selector_performance(
103 portfolio_selector_path, performance_data,
104 feature_data, objective)
106 rank_list = []
107 compare = operator.lt if objective.minimise else operator.gt
108 # Compute contribution per solver
109 # NOTE: This could be parallelised
110 for solver in performance_data.solvers:
111 solver_name = Path(solver).name
112 # 1. Copy the dataframe original df
113 tmp_performance_df = performance_data.copy()
114 # 2. Remove the solver from this copy
115 tmp_performance_df.remove_solver(solver)
116 ablated_actual_portfolio_selector =\
117 selector_scenario / f"ablate_{solver_name}" / "portfolio_selector"
118 if not ablated_actual_portfolio_selector.exists():
119 print(f"WARNING: Selector without {solver_name} does not exist! "
120 f"Cannot compute marginal contribution of {solver_name}.")
121 continue
123 ablated_selector_performance = compute_selector_performance(
124 ablated_actual_portfolio_selector, tmp_performance_df,
125 feature_data, objective)
127 # 1. If the performance remains equal, this solver did not contribute
128 # 2. If there is a performance decay without this solver, it does contribute
129 # 3. If there is a performance improvement, we have a bad portfolio selector
130 if ablated_selector_performance == selector_performance:
131 marginal_contribution = 0.0
132 elif not compare(ablated_selector_performance, selector_performance):
133 # In the case that the performance decreases, we have a contributing solver
134 marginal_contribution = ablated_selector_performance / selector_performance
135 else:
136 print("****** WARNING DUBIOUS SELECTOR/SOLVER: "
137 f"The omission of solver {solver_name} yields an improvement. "
138 "The selector improves better without this solver. It may be usefull "
139 "to construct a portfolio without this solver.")
140 marginal_contribution = 0.0
142 rank_list.append((solver, marginal_contribution, ablated_selector_performance))
144 rank_list.sort(key=lambda contribution: contribution[1], reverse=True)
145 return rank_list
148def compute_marginal_contribution(
149 scenario: Path,
150 compute_perfect: bool, compute_actual: bool) -> None:
151 """Compute the marginal contribution.
153 Args:
154 scenario: Path to the selector scenario for which to compute
155 compute_perfect: Bool indicating if the contribution for the perfect
156 portfolio selector should be computed.
157 compute_actual: Bool indicating if the contribution for the actual portfolio
158 selector should be computed.
159 """
160 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
161 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
162 objective = gv.settings().get_general_sparkle_objectives()[0]
164 if compute_perfect:
165 # Perfect selector is the computation of the best performance per instance
166 print("Computing each solver's marginal contribution to perfect selector ...")
167 contribution_data = performance_data.marginal_contribution(
168 objective=objective.name, sort=True)
169 table = tabulate.tabulate(
170 contribution_data,
171 headers=["Solver", "Marginal Contribution", "Best Performance"],)
172 print(table, "\n")
173 print("Marginal contribution (perfect selector) computing done!")
175 if compute_actual:
176 print("Start computing marginal contribution per Solver to actual selector...")
177 contribution_data = compute_selector_marginal_contribution(
178 performance_data,
179 feature_data,
180 scenario,
181 objective
182 )
183 table = tabulate.tabulate(
184 contribution_data,
185 headers=["Solver", "Marginal Contribution", "Best Performance"],)
186 print(table, "\n")
187 print("Marginal contribution (actual selector) computing done!")
190if __name__ == "__main__":
191 # Log command call
192 sl.log_command(sys.argv)
194 # Define command line arguments
195 parser = parser_function()
197 # Process command line arguments
198 args = parser.parse_args()
200 check_for_initialise(
201 COMMAND_DEPENDENCIES[CommandName.COMPUTE_MARGINAL_CONTRIBUTION]
202 )
204 if ac.set_by_user(args, "settings_file"):
205 gv.settings().read_settings_ini(
206 args.settings_file, SettingState.CMD_LINE
207 ) # Do first, so other command line options can override settings from the file
208 if ac.set_by_user(args, "objectives"):
209 gv.settings().set_general_sparkle_objectives(
210 args.objectives, SettingState.CMD_LINE
211 )
212 selection_scenario = gv.latest_scenario().get_selection_scenario_path()
214 if not (args.perfect | args.actual):
215 print("ERROR: compute_marginal_contribution called without a flag set to"
216 " True, stopping execution")
217 sys.exit(-1)
219 compute_marginal_contribution(selection_scenario, args.perfect, args.actual)
221 # Write used settings to file
222 gv.settings().write_used_settings()