Coverage for sparkle/CLI/compute_marginal_contribution.py: 34%

94 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-27 09:10 +0000

1#!/usr/bin/env python3 

2"""Sparkle command for the computation of the marginal contributions.""" 

3import sys 

4import argparse 

5from pathlib import Path 

6import operator 

7 

8import tabulate 

9 

10from sparkle.CLI.help import global_variables as gv 

11from sparkle.CLI.help import logging as sl 

12from sparkle.platform.settings_objects import SettingState 

13from sparkle.CLI.help import argparse_custom as ac 

14from sparkle.platform import CommandName, COMMAND_DEPENDENCIES 

15from sparkle.CLI.initialise import check_for_initialise 

16from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

17from sparkle.types import SparkleObjective 

18 

19 

20def parser_function() -> argparse.ArgumentParser: 

21 """Define the command line arguments.""" 

22 parser = argparse.ArgumentParser() 

23 parser.add_argument(*ac.PerfectSelectorMarginalContributionArgument.names, 

24 **ac.PerfectSelectorMarginalContributionArgument.kwargs) 

25 parser.add_argument(*ac.ActualMarginalContributionArgument.names, 

26 **ac.ActualMarginalContributionArgument.kwargs) 

27 parser.add_argument(*ac.SparkleObjectiveArgument.names, 

28 **ac.SparkleObjectiveArgument.kwargs) 

29 parser.add_argument(*ac.SettingsFileArgument.names, 

30 **ac.SettingsFileArgument.kwargs) 

31 

32 return parser 

33 

34 

35def compute_selector_performance( 

36 actual_portfolio_selector: Path, 

37 performance_data: PerformanceDataFrame, 

38 feature_data: FeatureDataFrame, 

39 objective: SparkleObjective) -> float: 

40 """Return the performance of a selector over all instances. 

41 

42 Args: 

43 actual_portfolio_selector: Path to portfolio selector. 

44 performance_data: The performance data. 

45 feature_data: The feature data. 

46 objective: Objective to compute the performance for 

47 

48 Returns: 

49 The selector performance as a single floating point number. 

50 """ 

51 performance_path = actual_portfolio_selector.parent / "performance.csv" 

52 if performance_path.exists(): 

53 selector_performance_data = PerformanceDataFrame(performance_path) 

54 return objective.instance_aggregator( 

55 selector_performance_data.get_values("portfolio_selector")) 

56 selector_performance_data = performance_data.copy() 

57 

58 selector_performance_data.add_solver("portfolio_selector") 

59 selector_performance_data.csv_filepath =\ 

60 actual_portfolio_selector.parent / "performance.csv" 

61 selector = gv.settings().get_general_sparkle_selector() 

62 

63 schedule = {} 

64 for instance in performance_data.instances: 

65 # We get the performance for an instance by infering the model predicition 

66 # for the instance. 

67 feature_vector = feature_data.get_instance(instance) 

68 schedule[instance] = selector.run(actual_portfolio_selector, feature_vector) 

69 schedule_performance = selector_performance_data.schedule_performance( 

70 schedule, target_solver="portfolio_selector", objective=objective) 

71 # Remove solvers from the dataframe 

72 selector_performance_data.remove_solver(performance_data.solvers) 

73 selector_performance_data.save_csv() # Save the results to disk 

74 return objective.instance_aggregator(schedule_performance) 

75 

76 

77def compute_selector_marginal_contribution( 

78 performance_data: PerformanceDataFrame, 

79 feature_data: FeatureDataFrame, 

80 selector_scenario: Path, 

81 objective: SparkleObjective) -> list[tuple[str, float]]: 

82 """Compute the marginal contributions of solvers in the selector. 

83 

84 Args: 

85 performance_data: Performance data object 

86 feature_data_csv_path: Path to the CSV file with the feature data. 

87 selector_scenario: Path to the selector scenario for which to compute 

88 marginal contribution. 

89 objective: Objective to compute the marginal contribution for. 

90 

91 Returns: 

92 A list of 2-tuples where every 2-tuple is of the form 

93 (solver name, marginal contribution, best_performance). 

94 """ 

95 portfolio_selector_path = selector_scenario / "portfolio_selector" 

96 

97 if not portfolio_selector_path.exists(): 

98 print(f"ERROR: Selector {portfolio_selector_path} does not exist! " 

99 "Cannot compute marginal contribution.") 

100 sys.exit(-1) 

101 

102 selector_performance = compute_selector_performance( 

103 portfolio_selector_path, performance_data, 

104 feature_data, objective) 

105 

106 rank_list = [] 

107 compare = operator.lt if objective.minimise else operator.gt 

108 # Compute contribution per solver 

109 # NOTE: This could be parallelised 

110 for solver in performance_data.solvers: 

111 solver_name = Path(solver).name 

112 # 1. Copy the dataframe original df 

113 tmp_performance_df = performance_data.copy() 

114 # 2. Remove the solver from this copy 

115 tmp_performance_df.remove_solver(solver) 

116 ablated_actual_portfolio_selector =\ 

117 selector_scenario / f"ablate_{solver_name}" / "portfolio_selector" 

118 if not ablated_actual_portfolio_selector.exists(): 

119 print(f"WARNING: Selector without {solver_name} does not exist! " 

120 f"Cannot compute marginal contribution of {solver_name}.") 

121 continue 

122 

123 ablated_selector_performance = compute_selector_performance( 

124 ablated_actual_portfolio_selector, tmp_performance_df, 

125 feature_data, objective) 

126 

127 # 1. If the performance remains equal, this solver did not contribute 

128 # 2. If there is a performance decay without this solver, it does contribute 

129 # 3. If there is a performance improvement, we have a bad portfolio selector 

130 if ablated_selector_performance == selector_performance: 

131 marginal_contribution = 0.0 

132 elif not compare(ablated_selector_performance, selector_performance): 

133 # In the case that the performance decreases, we have a contributing solver 

134 marginal_contribution = ablated_selector_performance / selector_performance 

135 else: 

136 print("****** WARNING DUBIOUS SELECTOR/SOLVER: " 

137 f"The omission of solver {solver_name} yields an improvement. " 

138 "The selector improves better without this solver. It may be usefull " 

139 "to construct a portfolio without this solver.") 

140 marginal_contribution = 0.0 

141 

142 rank_list.append((solver, marginal_contribution, ablated_selector_performance)) 

143 

144 rank_list.sort(key=lambda contribution: contribution[1], reverse=True) 

145 return rank_list 

146 

147 

148def compute_marginal_contribution( 

149 scenario: Path, 

150 compute_perfect: bool, compute_actual: bool) -> None: 

151 """Compute the marginal contribution. 

152 

153 Args: 

154 scenario: Path to the selector scenario for which to compute 

155 compute_perfect: Bool indicating if the contribution for the perfect 

156 portfolio selector should be computed. 

157 compute_actual: Bool indicating if the contribution for the actual portfolio 

158 selector should be computed. 

159 """ 

160 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path) 

161 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

162 objective = gv.settings().get_general_sparkle_objectives()[0] 

163 

164 if compute_perfect: 

165 # Perfect selector is the computation of the best performance per instance 

166 print("Computing each solver's marginal contribution to perfect selector ...") 

167 contribution_data = performance_data.marginal_contribution( 

168 objective=objective.name, sort=True) 

169 table = tabulate.tabulate( 

170 contribution_data, 

171 headers=["Solver", "Marginal Contribution", "Best Performance"],) 

172 print(table, "\n") 

173 print("Marginal contribution (perfect selector) computing done!") 

174 

175 if compute_actual: 

176 print("Start computing marginal contribution per Solver to actual selector...") 

177 contribution_data = compute_selector_marginal_contribution( 

178 performance_data, 

179 feature_data, 

180 scenario, 

181 objective 

182 ) 

183 table = tabulate.tabulate( 

184 contribution_data, 

185 headers=["Solver", "Marginal Contribution", "Best Performance"],) 

186 print(table, "\n") 

187 print("Marginal contribution (actual selector) computing done!") 

188 

189 

190if __name__ == "__main__": 

191 # Log command call 

192 sl.log_command(sys.argv) 

193 

194 # Define command line arguments 

195 parser = parser_function() 

196 

197 # Process command line arguments 

198 args = parser.parse_args() 

199 

200 check_for_initialise( 

201 COMMAND_DEPENDENCIES[CommandName.COMPUTE_MARGINAL_CONTRIBUTION] 

202 ) 

203 

204 if ac.set_by_user(args, "settings_file"): 

205 gv.settings().read_settings_ini( 

206 args.settings_file, SettingState.CMD_LINE 

207 ) # Do first, so other command line options can override settings from the file 

208 if ac.set_by_user(args, "objectives"): 

209 gv.settings().set_general_sparkle_objectives( 

210 args.objectives, SettingState.CMD_LINE 

211 ) 

212 selection_scenario = gv.latest_scenario().get_selection_scenario_path() 

213 

214 if not (args.perfect | args.actual): 

215 print("ERROR: compute_marginal_contribution called without a flag set to" 

216 " True, stopping execution") 

217 sys.exit(-1) 

218 

219 compute_marginal_contribution(selection_scenario, args.perfect, args.actual) 

220 

221 # Write used settings to file 

222 gv.settings().write_used_settings()