Coverage for sparkle/platform/generate_report_for_selection.py: 97%
74 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Helper functions for selection report generation."""
4import sys
5from pathlib import Path
6from collections import Counter
8from sparkle.CLI.compute_marginal_contribution\
9 import compute_selector_marginal_contribution
11from sparkle.platform import latex as stex
12from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
13from sparkle.types.objective import SparkleObjective
16def get_num_instance_sets(instance_list: list[str]) -> int:
17 """Get the number of instance sets.
19 Args:
20 instance_list: List of instances to use
22 Returns:
23 The number of instance sets as LaTeX str.
24 """
25 return len(set([Path(instance_path).parent.name
26 for instance_path in instance_list]))
29def get_instance_set_count_list(instance_list: list[str] = None) -> str:
30 """Get the instance sets for use in a LaTeX document.
32 Returns:
33 The list of instance sets as LaTeX str.
34 """
35 instance_list = [Path(instance_path).parent.name for instance_path in instance_list]
36 count = Counter(instance_list)
37 rows = [(inst_key, f", constisting of {count[inst_key]} instances")
38 for inst_key in count]
39 return stex.list_to_latex(rows)
42def solver_ranked_latex_list(solver_ranking: list[tuple[str, float]],
43 objective: SparkleObjective = None) -> str:
44 """Convert a list of the solvers ranked by performance to LaTeX.
46 Returns:
47 The list of solvers ranked as LaTeX str.
48 """
49 objective_str = f"{objective}: " if objective is not None else ""
50 return stex.list_to_latex([(row[0], f", {objective_str} {row[1]}")
51 for row in solver_ranking])
54def get_portfolio_selector_performance(selection_scenario: Path) -> PerformanceDataFrame:
55 """Creates a dictionary with the portfolio selector performance on each instance.
57 Returns:
58 A dict that maps instance name str to performance.
59 """
60 portfolio_selector_performance_path = selection_scenario / "performance.csv"
61 if not portfolio_selector_performance_path.exists():
62 print(f"ERROR: {portfolio_selector_performance_path} does not exist.")
63 sys.exit(-1)
64 return PerformanceDataFrame(portfolio_selector_performance_path)
67def get_figure_portfolio_selector_vs_sbs(
68 output_dir: Path,
69 objective: SparkleObjective,
70 train_data: PerformanceDataFrame,
71 portfolio_selector_performance: PerformanceDataFrame,
72 sbs_solver: str) -> str:
73 """Create a LaTeX plot comparing the selector and the SBS.
75 The plot compares the performance on each instance of the portfolio selector created
76 by Sparkle and the SBS (single best solver).
78 Returns:
79 LaTeX str to include the comparison plot in a LaTeX report.
80 """
81 # We create a point of x,y form (SBS performance, portfolio performance)
82 selector = portfolio_selector_performance.solvers[0]
83 points = [[float(train_data.get_value(sbs_solver, instance, objective.name)),
84 float(portfolio_selector_performance.get_value(selector,
85 instance,
86 objective.name))]
87 for instance in portfolio_selector_performance.instances]
89 figure_filename = "figure_portfolio_selector_sparkle_vs_sbs"
90 sbs_solver_name = Path(sbs_solver).name
92 stex.generate_comparison_plot(points,
93 figure_filename,
94 xlabel=f"SBS ({sbs_solver_name}) [{objective}]",
95 ylabel=f"Sparkle Selector [{objective}]",
96 limit="magnitude",
97 limit_min=0.25,
98 limit_max=0.25,
99 replace_zeros=True,
100 output_dir=output_dir)
101 return f"\\includegraphics[width=0.6\\textwidth]{{{figure_filename}}}"
104def get_figure_portfolio_selector_sparkle_vs_vbs(
105 output_dir: Path,
106 objective: SparkleObjective,
107 train_data: PerformanceDataFrame,
108 actual_portfolio_selector_penalty: PerformanceDataFrame) -> str:
109 """Create a LaTeX plot comparing the selector and the VBS.
111 The plot compares the performance on each instance of the portfolio selector created
112 by Sparkle and the VBS (virtual best solver).
114 Returns:
115 LaTeX str to include the comparison plot in a LaTeX report.
116 """
117 vbs_performance = train_data.best_instance_performance(objective=objective.name)
118 instances = actual_portfolio_selector_penalty.instances
119 solver = actual_portfolio_selector_penalty.solvers[0]
120 points = [(vbs_performance[instance],
121 actual_portfolio_selector_penalty.get_value(solver,
122 instance,
123 objective.name))
124 for instance in instances]
126 figure_filename = "figure_portfolio_selector_sparkle_vs_vbs"
128 stex.generate_comparison_plot(points,
129 figure_filename,
130 xlabel=f"VBS [{objective}]",
131 ylabel=f"Sparkle Selector [{objective.name}]",
132 limit="magnitude",
133 limit_min=0.25,
134 limit_max=0.25,
135 replace_zeros=True,
136 output_dir=output_dir)
137 return f"\\includegraphics[width=0.6\\textwidth]{{{figure_filename}}}"
140def selection_report_variables(
141 target_dir: Path,
142 bibliograpghy_path: Path,
143 extractor_path: Path,
144 selection_scenario: Path,
145 performance_data: PerformanceDataFrame,
146 feature_data: FeatureDataFrame,
147 objective: SparkleObjective,
148 extractor_cutoff: int,
149 cutoff: int,
150 test_case_data: PerformanceDataFrame = None) -> dict[str, str]:
151 """Returns: a dict matching variables in the LaTeX template with their values.
153 Args:
154 target_dir: Output path
155 bibliography_path: Path to the bib file
156 test_case_directory: Path to the test case directory.
158 Returns:
159 A dict matching str variables in the LaTeX template with their value str.
160 """
161 actual_performance_data = get_portfolio_selector_performance(selection_scenario)
162 solver_performance_ranking = performance_data.get_solver_ranking(
163 objective=objective)
164 single_best_solver = solver_performance_ranking[0][0]
165 latex_dict = {"bibliographypath": bibliograpghy_path.absolute(),
166 "numSolvers": performance_data.num_solvers,
167 "solverList": stex.list_to_latex([(s, "")
168 for s in performance_data.solvers])}
169 latex_dict["numFeatureExtractors"] = len(
170 [p for p in extractor_path.iterdir() if p.is_dir()])
171 stex.list_to_latex([(f, "") for f in extractor_path.iterdir()])
172 latex_dict["featureExtractorList"] = stex.list_to_latex(
173 [(f, "") for f in extractor_path.iterdir()])
174 latex_dict["numInstanceClasses"] = get_num_instance_sets(performance_data.instances)
175 latex_dict["instanceClassList"] =\
176 get_instance_set_count_list(performance_data.instances)
177 latex_dict["featureComputationCutoffTime"] = extractor_cutoff
178 latex_dict["performanceComputationCutoffTime"] = cutoff
179 rank_list_perfect = performance_data.marginal_contribution(objective, sort=True)
180 rank_list_actual = compute_selector_marginal_contribution(performance_data,
181 feature_data,
182 selection_scenario,
183 objective)
184 latex_dict["solverPerfectRankingList"] = solver_ranked_latex_list(rank_list_perfect)
185 latex_dict["solverActualRankingList"] = solver_ranked_latex_list(rank_list_actual)
186 latex_dict["PARRankingList"] = solver_ranked_latex_list(solver_performance_ranking,
187 objective)
188 latex_dict["VBSPAR"] = objective.instance_aggregator(
189 performance_data.best_instance_performance(objective=objective.name))
190 latex_dict["actualPAR"] = actual_performance_data.mean(objective=objective.name)
191 latex_dict["metric"] = objective.name
192 latex_dict["figure-portfolio-selector-sparkle-vs-sbs"] =\
193 get_figure_portfolio_selector_vs_sbs(
194 target_dir, objective, performance_data,
195 actual_performance_data, single_best_solver)
196 latex_dict["figure-portfolio-selector-sparkle-vs-vbs"] =\
197 get_figure_portfolio_selector_sparkle_vs_vbs(target_dir,
198 objective,
199 performance_data,
200 actual_performance_data)
201 latex_dict["testBool"] = r"\testfalse"
203 # Train and test
204 if test_case_data is not None:
205 latex_dict["testInstanceClass"] =\
206 f"\\textbf{ {test_case_data.csv_filepath.parent.name} }"
207 latex_dict["numInstanceInTestInstanceClass"] =\
208 test_case_data.num_instances
209 latex_dict["testActualPAR"] = test_case_data.mean(objective=objective.name)
210 latex_dict["testBool"] = r"\testtrue"
212 return latex_dict
215def generate_report_selection(target_path: Path,
216 latex_dir: Path,
217 latex_template: Path,
218 bibliography_path: Path,
219 extractor_path: Path,
220 selection_scenario: Path,
221 feature_data: FeatureDataFrame,
222 train_data: PerformanceDataFrame,
223 objective: SparkleObjective,
224 extractor_cutoff: int,
225 cutoff: int,
226 test_case_data: PerformanceDataFrame = None) -> None:
227 """Generate a report for algorithm selection.
229 Args:
230 target_path: Path where the outputfiles will be placed.
231 latex_dir: The latex dir
232 latex_template: The template for the report
233 bibliography_path: Path to the bib file.
234 extractor_path: Path to the extractor used
235 selection_scenario: Path to the selector scenario
236 feature_data: Feature data created by extractor
237 train_data: The performance input data for the selector
238 objective: The objective for the selector
239 extractor_cutoff: The maximum time for the selector to run
240 cutoff: The cutoff per solver
241 test_case_data: Path to the test case directory. Defaults to None.
242 """
243 # Include results on the test set if a test case directory is given
244 latex_report_filename = Path("Sparkle_Report")
245 if test_case_data is not None:
246 latex_report_filename = Path("Sparkle_Report_for_Test")
248 target_path.mkdir(parents=True, exist_ok=True)
249 dict_variable_to_value = selection_report_variables(target_path,
250 bibliography_path,
251 extractor_path,
252 selection_scenario,
253 train_data,
254 feature_data,
255 objective,
256 extractor_cutoff,
257 cutoff,
258 test_case_data)
259 stex.generate_report(latex_dir,
260 latex_template,
261 target_path,
262 latex_report_filename,
263 dict_variable_to_value)