Coverage for sparkle/platform/generate_report_for_selection.py: 0%

74 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-27 09:10 +0000

1#!/usr/bin/env python3 

2# -*- coding: UTF-8 -*- 

3"""Helper functions for selection report generation.""" 

4import sys 

5from pathlib import Path 

6from collections import Counter 

7 

8from sparkle.CLI.compute_marginal_contribution\ 

9 import compute_selector_marginal_contribution 

10 

11from sparkle.platform import latex as stex 

12from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

13from sparkle.types.objective import SparkleObjective 

14 

15 

16def get_num_instance_sets(instance_list: list[str]) -> int: 

17 """Get the number of instance sets. 

18 

19 Args: 

20 instance_list: List of instances to use 

21 

22 Returns: 

23 The number of instance sets as LaTeX str. 

24 """ 

25 return len(set([Path(instance_path).parent.name 

26 for instance_path in instance_list])) 

27 

28 

29def get_instance_set_count_list(instance_list: list[str] = None) -> str: 

30 """Get the instance sets for use in a LaTeX document. 

31 

32 Returns: 

33 The list of instance sets as LaTeX str. 

34 """ 

35 instance_list = [Path(instance_path).parent.name for instance_path in instance_list] 

36 count = Counter(instance_list) 

37 rows = [(inst_key, f", constisting of {count[inst_key]} instances") 

38 for inst_key in count] 

39 return stex.list_to_latex(rows) 

40 

41 

42def solver_ranked_latex_list(solver_ranking: list[tuple[str, float]], 

43 objective: SparkleObjective = None) -> str: 

44 """Convert a list of the solvers ranked by performance to LaTeX. 

45 

46 Returns: 

47 The list of solvers ranked as LaTeX str. 

48 """ 

49 objective_str = f"{objective}: " if objective is not None else "" 

50 return stex.list_to_latex([(row[0], f", {objective_str} {row[1]}") 

51 for row in solver_ranking]) 

52 

53 

54def get_portfolio_selector_performance(selection_scenario: Path) -> PerformanceDataFrame: 

55 """Creates a dictionary with the portfolio selector performance on each instance. 

56 

57 Returns: 

58 A dict that maps instance name str to performance. 

59 """ 

60 portfolio_selector_performance_path = selection_scenario / "performance.csv" 

61 if not portfolio_selector_performance_path.exists(): 

62 print(f"ERROR: {portfolio_selector_performance_path} does not exist.") 

63 sys.exit(-1) 

64 return PerformanceDataFrame(portfolio_selector_performance_path) 

65 

66 

67def get_figure_portfolio_selector_vs_sbs( 

68 output_dir: Path, 

69 objective: SparkleObjective, 

70 train_data: PerformanceDataFrame, 

71 portfolio_selector_performance: PerformanceDataFrame, 

72 sbs_solver: str) -> str: 

73 """Create a LaTeX plot comparing the selector and the SBS. 

74 

75 The plot compares the performance on each instance of the portfolio selector created 

76 by Sparkle and the SBS (single best solver). 

77 

78 Returns: 

79 LaTeX str to include the comparison plot in a LaTeX report. 

80 """ 

81 # We create a point of x,y form (SBS performance, portfolio performance) 

82 selector = portfolio_selector_performance.solvers[0] 

83 points = [[train_data.get_value(sbs_solver, instance, objective.name), 

84 portfolio_selector_performance.get_value(selector, 

85 instance, 

86 objective.name)] 

87 for instance in portfolio_selector_performance.instances] 

88 

89 figure_filename = "figure_portfolio_selector_sparkle_vs_sbs" 

90 sbs_solver_name = Path(sbs_solver).name 

91 

92 stex.generate_comparison_plot(points, 

93 figure_filename, 

94 xlabel=f"SBS ({sbs_solver_name}) [{objective}]", 

95 ylabel=f"Sparkle Selector [{objective}]", 

96 limit="magnitude", 

97 limit_min=0.25, 

98 limit_max=0.25, 

99 replace_zeros=True, 

100 output_dir=output_dir) 

101 return f"\\includegraphics[width=0.6\\textwidth]{{{figure_filename}}}" 

102 

103 

104def get_figure_portfolio_selector_sparkle_vs_vbs( 

105 output_dir: Path, 

106 objective: SparkleObjective, 

107 train_data: PerformanceDataFrame, 

108 actual_portfolio_selector_penalty: PerformanceDataFrame) -> str: 

109 """Create a LaTeX plot comparing the selector and the VBS. 

110 

111 The plot compares the performance on each instance of the portfolio selector created 

112 by Sparkle and the VBS (virtual best solver). 

113 

114 Returns: 

115 LaTeX str to include the comparison plot in a LaTeX report. 

116 """ 

117 vbs_performance = train_data.best_instance_performance(objective=objective.name) 

118 instances = actual_portfolio_selector_penalty.instances 

119 solver = actual_portfolio_selector_penalty.solvers[0] 

120 points = [(vbs_performance[instance], 

121 actual_portfolio_selector_penalty.get_value(solver, 

122 instance, 

123 objective.name)) 

124 for instance in instances] 

125 

126 figure_filename = "figure_portfolio_selector_sparkle_vs_vbs" 

127 

128 stex.generate_comparison_plot(points, 

129 figure_filename, 

130 xlabel=f"VBS [{objective}]", 

131 ylabel=f"Sparkle Selector [{objective.name}]", 

132 limit="magnitude", 

133 limit_min=0.25, 

134 limit_max=0.25, 

135 replace_zeros=True, 

136 output_dir=output_dir) 

137 return f"\\includegraphics[width=0.6\\textwidth]{{{figure_filename}}}" 

138 

139 

140def selection_report_variables( 

141 target_dir: Path, 

142 bibliograpghy_path: Path, 

143 extractor_path: Path, 

144 selection_scenario: Path, 

145 performance_data: PerformanceDataFrame, 

146 feature_data: FeatureDataFrame, 

147 objective: SparkleObjective, 

148 extractor_cutoff: int, 

149 cutoff: int, 

150 test_case_data: PerformanceDataFrame = None) -> dict[str, str]: 

151 """Returns: a dict matching variables in the LaTeX template with their values. 

152 

153 Args: 

154 target_dir: Output path 

155 bibliography_path: Path to the bib file 

156 test_case_directory: Path to the test case directory. 

157 

158 Returns: 

159 A dict matching str variables in the LaTeX template with their value str. 

160 """ 

161 actual_performance_data = get_portfolio_selector_performance(selection_scenario) 

162 solver_performance_ranking = performance_data.get_solver_ranking( 

163 objective=objective) 

164 single_best_solver = solver_performance_ranking[0][0] 

165 latex_dict = {"bibliographypath": bibliograpghy_path.absolute(), 

166 "numSolvers": performance_data.num_solvers, 

167 "solverList": stex.list_to_latex([(s, "") 

168 for s in performance_data.solvers])} 

169 latex_dict["numFeatureExtractors"] = len( 

170 [p for p in extractor_path.iterdir() if p.is_dir()]) 

171 stex.list_to_latex([(f, "") for f in extractor_path.iterdir()]) 

172 latex_dict["featureExtractorList"] = stex.list_to_latex( 

173 [(f, "") for f in extractor_path.iterdir()]) 

174 latex_dict["numInstanceClasses"] = get_num_instance_sets(performance_data.instances) 

175 latex_dict["instanceClassList"] =\ 

176 get_instance_set_count_list(performance_data.instances) 

177 latex_dict["featureComputationCutoffTime"] = extractor_cutoff 

178 latex_dict["performanceComputationCutoffTime"] = cutoff 

179 rank_list_perfect = performance_data.marginal_contribution(objective, sort=True) 

180 rank_list_actual = compute_selector_marginal_contribution(performance_data, 

181 feature_data, 

182 selection_scenario, 

183 objective) 

184 latex_dict["solverPerfectRankingList"] = solver_ranked_latex_list(rank_list_perfect) 

185 latex_dict["solverActualRankingList"] = solver_ranked_latex_list(rank_list_actual) 

186 latex_dict["PARRankingList"] = solver_ranked_latex_list(solver_performance_ranking, 

187 objective) 

188 latex_dict["VBSPAR"] = objective.instance_aggregator( 

189 performance_data.best_instance_performance(objective=objective.name)) 

190 latex_dict["actualPAR"] = actual_performance_data.mean(objective=objective.name) 

191 latex_dict["metric"] = objective.name 

192 latex_dict["figure-portfolio-selector-sparkle-vs-sbs"] =\ 

193 get_figure_portfolio_selector_vs_sbs( 

194 target_dir, objective, performance_data, 

195 actual_performance_data, single_best_solver) 

196 latex_dict["figure-portfolio-selector-sparkle-vs-vbs"] =\ 

197 get_figure_portfolio_selector_sparkle_vs_vbs(target_dir, 

198 objective, 

199 performance_data, 

200 actual_performance_data) 

201 latex_dict["testBool"] = r"\testfalse" 

202 

203 # Train and test 

204 if test_case_data is not None: 

205 latex_dict["testInstanceClass"] =\ 

206 f"\\textbf{ {test_case_data.csv_filepath.parent.name} }" 

207 latex_dict["numInstanceInTestInstanceClass"] =\ 

208 test_case_data.num_instances 

209 latex_dict["testActualPAR"] = test_case_data.mean(objective=objective.name) 

210 latex_dict["testBool"] = r"\testtrue" 

211 

212 return latex_dict 

213 

214 

215def generate_report_selection(target_path: Path, 

216 latex_dir: Path, 

217 latex_template: Path, 

218 bibliography_path: Path, 

219 extractor_path: Path, 

220 selection_scenario: Path, 

221 feature_data: FeatureDataFrame, 

222 train_data: PerformanceDataFrame, 

223 objective: SparkleObjective, 

224 extractor_cutoff: int, 

225 cutoff: int, 

226 test_case_data: PerformanceDataFrame = None) -> None: 

227 """Generate a report for algorithm selection. 

228 

229 Args: 

230 target_path: Path where the outputfiles will be placed. 

231 latex_dir: The latex dir 

232 latex_template: The template for the report 

233 bibliography_path: Path to the bib file. 

234 extractor_path: Path to the extractor used 

235 selection_scenario: Path to the selector scenario 

236 feature_data: Feature data created by extractor 

237 train_data: The performance input data for the selector 

238 objective: The objective for the selector 

239 extractor_cutoff: The maximum time for the selector to run 

240 cutoff: The cutoff per solver 

241 test_case_data: Path to the test case directory. Defaults to None. 

242 """ 

243 # Include results on the test set if a test case directory is given 

244 latex_report_filename = Path("Sparkle_Report") 

245 if test_case_data is not None: 

246 latex_report_filename = Path("Sparkle_Report_for_Test") 

247 

248 target_path.mkdir(parents=True, exist_ok=True) 

249 dict_variable_to_value = selection_report_variables(target_path, 

250 bibliography_path, 

251 extractor_path, 

252 selection_scenario, 

253 train_data, 

254 feature_data, 

255 objective, 

256 extractor_cutoff, 

257 cutoff, 

258 test_case_data) 

259 stex.generate_report(latex_dir, 

260 latex_template, 

261 target_path, 

262 latex_report_filename, 

263 dict_variable_to_value)