Coverage for sparkle/platform/generate_report_for_configuration.py: 84%
146 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Helper functions for algorithm configuration report generation."""
4from __future__ import annotations
6import sys
7from pathlib import Path
9from scipy.stats import linregress
11from sparkle.platform import latex as stex
12from sparkle.configurator.ablation import AblationScenario
13from sparkle.configurator.configurator import ConfigurationScenario
14from sparkle.instance import InstanceSet
15from sparkle.structures import PerformanceDataFrame
16from sparkle.types import SolverStatus
17from sparkle import about
19from sparkle.platform.output.configuration_output import ConfigurationOutput
22def get_features_bool(configurator_scenario: ConfigurationScenario,
23 solver_name: str, train_set: InstanceSet) -> str:
24 """Return a bool string for latex indicating whether features were used.
26 True if a feature file is given in the scenario file, false otherwise.
28 Args:
29 solver_name: Name of the solver
30 instance_set_train_name: Name of the instance set used for training
32 Returns:
33 A string describing whether features are used
34 """
35 scenario_file = configurator_scenario.directory \
36 / f"{solver_name}_{train_set.name}_scenario.txt"
38 for line in scenario_file.open("r").readlines():
39 if line.split(" ")[0] == "feature_file":
40 return "\\featurestrue"
41 return "\\featuresfalse"
44def get_ablation_bool(scenario: AblationScenario) -> str:
45 """Return the ablation bool as LaTeX string.
47 Args:
48 solver: The solver object
49 instance_train_name: Name of the trianing instance set
50 instance_test_name: Name of the testing instance set
52 Returns:
53 A string describing whether ablation was run or not
54 """
55 if scenario.check_for_ablation():
56 return "\\ablationtrue"
57 return "\\ablationfalse"
60def plot_configured_vs_default(
61 config_output: ConfigurationOutput,
62 config_scenario: ConfigurationScenario,
63 target_directory: Path,
64 test_mode: bool = False,) -> str:
65 """Create a figure comparing the configured and default solver.
67 Base function to create a comparison plot of a given instance set between the default
68 and configured performance.
70 Args:
71 config_output: Object representation of the ConfigurationOutput
72 config_scenario: ConfigurationScenario
73 target_directory: Directory for the configuration reports
74 instance_set: InstanceSet to plot
76 Returns:
77 A string containing the latex command to include the figure
78 """
79 instance_set_name = (config_output.instance_set_train.name
80 if not test_mode else config_output.instance_set_test.name)
81 figure_filename =\
82 f"data_{config_output.solver.name}_configured_vs_default_on_{instance_set_name}"
83 if not test_mode:
84 points = [p for p in zip(config_output.default_performance_per_instance_train,
85 config_output.best_conf_performance_per_instance_train)]
86 else:
87 points = [p for p in zip(config_output.default_performance_per_instance_train,
88 config_output.best_conf_performance_per_instance_test)]
89 objective_name = config_scenario.sparkle_objective.name
90 plot_params = {"xlabel": f"Default parameters [{objective_name}]",
91 "ylabel": f"Configured parameters [{objective_name}]",
92 "scale": "linear",
93 "limit_min": 1.5,
94 "limit_max": 1.5,
95 "replace_zeros": False,
96 "output_dir": target_directory
97 }
98 # Check if the scale of the axis can be considered linear
99 x_points = [p[0] for p in points]
100 y_points = [p[1] for p in points]
101 if not len(set(x_points)) == 1 and not len(set(y_points)) == 1:
102 linearity_x = linregress(x_points, range(len(points))).rvalue > 0.5
103 linearity_y = linregress(y_points, range(len(points))).rvalue > 0.5
104 if not linearity_x or not linearity_y:
105 plot_params["scale"] = "log"
106 plot_params["replace_zeros"] = True
108 stex.generate_comparison_plot(points,
109 figure_filename,
110 **plot_params)
112 return f"\\includegraphics[width=0.6\\textwidth]{{{figure_filename}}}"
115def get_timeouts_instanceset(config_output: ConfigurationOutput,
116 instance_set: InstanceSet) -> tuple[int, int, int]:
117 """Return the number of timeouts by configured, default and both on the instance set.
119 Args:
120 configuration_scenario: ConfigurationScenario
121 config_output: ConfigurationOutput
123 Returns:
124 A tuple containing the number of timeouts for the different configurations
125 """
126 solver_key = str(config_output.solver.directory)
127 instance_keys = [str(instance) for instance in instance_set.instance_paths]
128 # Determine status objective name
129 objective = [o for o in config_output.performance_data.objectives
130 if o.stem.lower() == "status"][0]
131 _, configured_status = config_output.performance_data.configuration_performance(
132 solver_key,
133 configuration=config_output.best_configuration,
134 objective=objective,
135 instances=instance_keys,
136 per_instance=True)
137 _, default_status = config_output.performance_data.configuration_performance(
138 solver_key,
139 configuration=PerformanceDataFrame.missing_value,
140 objective=objective,
141 instances=instance_keys,
142 per_instance=True)
144 default_timeouts, configured_timeouts, shared = 0, 0, 0
145 for configured_status, default_status in zip(configured_status, default_status):
146 configured_status, default_status =\
147 SolverStatus(configured_status), SolverStatus(default_status)
148 if configured_status == SolverStatus.TIMEOUT:
149 configured_timeouts += 1
150 if default_status == SolverStatus.TIMEOUT:
151 default_timeouts += 1
152 if (configured_status == SolverStatus.TIMEOUT
153 and default_status == SolverStatus.TIMEOUT):
154 shared += 1
155 return configured_timeouts, default_timeouts, shared
158def get_ablation_table(scenario: AblationScenario) -> str:
159 """Generate a LaTeX table of the ablation path.
161 This is the result of the ablation analysis to determine the parameter importance.
163 Args:
164 solver: The solver object
165 instance_set_train_name: Name of the instance set for training
166 instance_set_test_name: Name of the instance set for testing
168 Returns:
169 A string containing the LaTeX table code of the ablation path
170 """
171 results = scenario.read_ablation_table()
172 table_string = r"\begin{tabular}{rp{0.25\linewidth}rrr}"
173 # "Round", "Flipped parameter", "Source value", "Target value", "Validation result"
174 for i, line in enumerate(results):
175 # If this fails something has changed in the representation of ablation tables
176 if len(line) != 5:
177 print("""ERROR: something has changed with the representation
178 of ablation tables""")
179 sys.exit(-1)
180 if i == 0:
181 line = [f"\\textbf{{{word}}}" for word in line]
183 # Put multiple variable changes in one round on a seperate line
184 if (len(line[1].split(",")) > 1
185 and len(line[1].split(",")) == len(line[2].split(","))
186 and len(line[1].split(",")) == len(line[3].split(","))):
187 params = line[1].split(",")
188 default_values = line[2].split(",")
189 flipped_values = line[3].split(",")
191 sublines = len(params)
192 for subline in range(sublines):
193 round = "" if subline != 0 else line[0]
194 result = "" if subline + 1 != sublines else line[-1]
195 printline = [round, params[subline], default_values[subline],
196 flipped_values[subline], result]
197 table_string += " & ".join(printline) + " \\\\ "
198 else:
199 table_string += " & ".join(line) + " \\\\ "
200 if i == 0:
201 table_string += "\\hline "
202 table_string += "\\end{tabular}"
204 return table_string
207def configuration_report_variables(config_scenario: ConfigurationScenario,
208 config_output: ConfigurationOutput,
209 target_dir: Path,
210 bib_path: Path,
211 extractor_dir: Path,
212 extractor_cutoff: int,
213 ablation: AblationScenario = None) -> dict:
214 """Return a dict matching LaTeX variables and their values.
216 Args:
217 config_scenario: Object representation of the ConfigurationScenario
218 config_output: Object representation of the ConfigurationOutput
219 target_dir: Target directory
220 bib_path: Path to the latex bib file.
221 extractor_dir: General platform extractor Directory
222 extractactor_cutoff: Extractor cut off time.
223 ablation: Whether or not ablation is used. Defaults to True.
225 Returns:
226 A dictionary containing the variables and values
227 """
228 has_test = config_output.instance_set_test is not None
229 full_dict = {"bibliographypath": bib_path.absolute(),
230 "sparkleVersion": about.version}
232 full_dict.update(get_dict_variable_to_value_common(config_scenario,
233 config_output,
234 ablation,
235 target_dir))
237 if has_test:
238 test_dict = get_dict_variable_to_value_test(config_output,
239 config_scenario,
240 target_dir,
241 ablation)
242 full_dict.update(test_dict)
243 full_dict["testBool"] = f"\\test{str(has_test).lower()}"
245 if ablation is None:
246 full_dict["ablationBool"] = "\\ablationfalse"
248 if full_dict["featuresBool"] == "\\featurestrue":
249 full_dict["numFeatureExtractors"] =\
250 len([p for p in extractor_dir.iterdir()])
251 full_dict["featureExtractorList"] =\
252 stex.list_to_latex([(p.name, "") for p in extractor_dir.iterdir()])
253 full_dict["featureComputationCutoffTime"] = extractor_cutoff
255 return full_dict
258def get_dict_variable_to_value_common(config_scenario: ConfigurationScenario,
259 config_output: ConfigurationOutput,
260 ablation: AblationScenario,
261 target_directory: Path) -> dict:
262 """Return a dict matching LaTeX variables and values used for all config. reports.
264 Args:
265 config_scenario: Configuration scenario
266 config_output: configuration output
267 ablation: Ablation scenario, if ran
268 target_directory: Path to directory with configuration reports
270 Returns:
271 A dictionary containing the variables and values
272 """
273 objective = config_scenario.sparkle_objective
275 latex_dict = {"objectiveName": objective.name,
276 "configuratorName": config_output.configurator.name,
277 "configuratorVersion": config_output.configurator.version,
278 "configuratorFullName": config_output.configurator.full_name}
280 if objective.time:
281 latex_dict["runtimeBool"] = "\\runtimetrue"
282 latex_dict["objectiveType"] = "RUNTIME"
283 else:
284 latex_dict["runtimeBool"] = "\\runtimefalse"
285 latex_dict["objectiveType"] = "QUALITY"
286 if objective.minimise:
287 latex_dict["minMaxAdjective"] = "lowest"
288 else:
289 latex_dict["minMaxAdjective"] = "highest"
291 latex_dict["solver"] = config_output.solver.name
292 latex_dict["instanceSetTrain"] = config_scenario.instance_set.name
293 latex_dict["numInstanceInTrainingInstanceSet"] = config_scenario.instance_set.size
295 latex_dict["numConfiguratorRuns"] = config_scenario.number_of_runs
296 if hasattr(config_scenario, "wallclock_time"):
297 latex_dict["wholeTimeBudget"] = config_scenario.wallclock_time
298 elif hasattr(config_scenario, "smac3_scenario"): # SMAC3
299 latex_dict["wholeTimeBudget"] = config_scenario.smac3_scenario.walltime_limit
300 else:
301 latex_dict["wholeTimeBudget"] = config_scenario.max_time
302 latex_dict["eachRunCutoffTime"] = config_scenario.cutoff_time
304 opt_config_list = [f"{key}: {value}" for key, value in
305 config_output.best_configuration.items()]
306 latex_dict["optimisedConfiguration"] = stex.list_to_latex(opt_config_list)
307 latex_dict["optimisedConfigurationTrainingPerformance"] =\
308 config_output.best_performance_train
309 latex_dict["defaultConfigurationTrainingPerformance"] =\
310 config_output.default_performance_train
312 latex_dict["figure-configured-vs-default-train"] = plot_configured_vs_default(
313 config_output,
314 config_scenario,
315 target_directory)
317 # Retrieve timeout numbers for the training instances
318 configured_timeouts_train, default_timeouts_train, overlapping_timeouts_train =\
319 get_timeouts_instanceset(
320 config_output,
321 config_output.instance_set_train)
323 latex_dict["timeoutsTrainDefault"] = default_timeouts_train
324 latex_dict["timeoutsTrainConfigured"] = configured_timeouts_train
325 latex_dict["timeoutsTrainOverlap"] = overlapping_timeouts_train
326 latex_dict["ablationBool"] = get_ablation_bool(ablation)
327 latex_dict["ablationPath"] = get_ablation_table(ablation)
328 latex_dict["featuresBool"] = get_features_bool(
329 config_scenario, config_output.solver.name, config_scenario.instance_set)
331 return latex_dict
334def get_dict_variable_to_value_test(
335 config_output: ConfigurationOutput,
336 configuration_scenario: ConfigurationScenario,
337 target_dir: Path,
338 ablation: AblationScenario) -> dict:
339 """Return a dict matching test set specific latex variables with their values.
341 Args:
342 config_output: Configuration output
343 configuration_scenario: Configuration scenario
344 target_dir: Path to where output should go
345 ablation: Ablation scenario, if ran
347 Returns:
348 A dictionary containting the variables and their values
349 """
350 test_dict = {"instanceSetTest": config_output.instance_set_test.name}
351 test_dict["numInstanceInTestingInstanceSet"] = config_output.instance_set_test.size
352 test_dict["optimisedConfigurationTestingPerformance"] =\
353 config_output.best_performance_test
354 test_dict["defaultConfigurationTestingPerformance"] =\
355 config_output.default_performance_test
357 test_dict["figure-configured-vs-default-test"] =\
358 plot_configured_vs_default(
359 config_output, configuration_scenario,
360 target_dir, test_mode=True)
362 # Retrieve timeout numbers for the testing instances
363 configured_timeouts_test, default_timeouts_test, overlapping_timeouts_test =\
364 get_timeouts_instanceset(config_output,
365 config_output.instance_set_test)
367 test_dict["timeoutsTestDefault"] = default_timeouts_test
368 test_dict["timeoutsTestConfigured"] = configured_timeouts_test
369 test_dict["timeoutsTestOverlap"] = overlapping_timeouts_test
370 test_dict["ablationBool"] = get_ablation_bool(ablation)
371 test_dict["ablationPath"] = get_ablation_table(ablation)
372 return test_dict
375def generate_report_for_configuration(config_scenario: ConfigurationScenario,
376 config_output: ConfigurationOutput,
377 extractor_dir: Path,
378 target_path: Path,
379 latex_template_path: Path,
380 bibliography_path: Path,
381 extractor_cuttoff: int,
382 ablation: AblationScenario = None) -> None:
383 """Generate a report for algorithm configuration.
385 Args:
386 config_scenario: The configuration scenario to report
387 config_output: The configuration output object of the scenario
388 extractor_dir: Path to the extractor used
389 target_path: Where the report files will be placed.
390 latex_template_path: Path to the template to use for the report
391 bibliography_path: The bib corresponding to the latex template
392 extractor_cuttoff: Cut off for extractor
393 ablation: The ablation scenario if ablation was run.
394 """
395 target_path.mkdir(parents=True, exist_ok=True)
396 variables_dict = configuration_report_variables(
397 config_scenario, config_output, target_path, bibliography_path, extractor_dir,
398 extractor_cuttoff, ablation)
399 stex.generate_report(latex_template_path,
400 "template-Sparkle-for-configuration.tex",
401 target_path,
402 "Sparkle_Report_for_Configuration",
403 variables_dict)