Coverage for sparkle/platform/generate_report_for_configuration.py: 83%
148 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-03 10:42 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Helper functions for algorithm configuration report generation."""
4from __future__ import annotations
6import sys
7from pathlib import Path
9from scipy.stats import linregress
11from sparkle.platform import latex as stex
12from sparkle.configurator.ablation import AblationScenario
13from sparkle.configurator.configurator import ConfigurationScenario
14from sparkle.instance import InstanceSet
15from sparkle.structures import PerformanceDataFrame
16from sparkle.types import SolverStatus
17from sparkle import about
19from sparkle.platform.output.configuration_output import ConfigurationOutput
22def get_features_bool(configurator_scenario: ConfigurationScenario,
23 solver_name: str, train_set: InstanceSet) -> str:
24 """Return a bool string for latex indicating whether features were used.
26 True if a feature file is given in the scenario file, false otherwise.
28 Args:
29 solver_name: Name of the solver
30 instance_set_train_name: Name of the instance set used for training
32 Returns:
33 A string describing whether features are used
34 """
35 scenario_file = configurator_scenario.directory \
36 / f"{solver_name}_{train_set.name}_scenario.txt"
38 for line in scenario_file.open("r").readlines():
39 if line.split(" ")[0] == "feature_file":
40 return "\\featurestrue"
41 return "\\featuresfalse"
44def get_ablation_bool(scenario: AblationScenario) -> str:
45 """Return the ablation bool as LaTeX string.
47 Args:
48 solver: The solver object
49 instance_train_name: Name of the trianing instance set
50 instance_test_name: Name of the testing instance set
52 Returns:
53 A string describing whether ablation was run or not
54 """
55 if scenario.check_for_ablation():
56 return "\\ablationtrue"
57 return "\\ablationfalse"
60def plot_configured_vs_default(
61 config_output: ConfigurationOutput,
62 config_scenario: ConfigurationScenario,
63 target_directory: Path,
64 test_mode: bool = False,) -> str:
65 """Create a figure comparing the configured and default solver.
67 Base function to create a comparison plot of a given instance set between the default
68 and configured performance.
70 Args:
71 config_output: Object representation of the ConfigurationOutput
72 config_scenario: ConfigurationScenario
73 target_directory: Directory for the configuration reports
74 instance_set: InstanceSet to plot
76 Returns:
77 A string containing the latex command to include the figure
78 """
79 instance_set_name = (config_output.instance_set_train.name
80 if not test_mode else config_output.instance_set_test.name)
81 figure_filename =\
82 f"data_{config_output.solver.name}_configured_vs_default_on_{instance_set_name}"
83 if not test_mode:
84 points = [p for p in zip(config_output.default_performance_per_instance_train,
85 config_output.best_conf_performance_per_instance_train)]
86 else:
87 points = [p for p in zip(config_output.default_performance_per_instance_train,
88 config_output.best_conf_performance_per_instance_test)]
89 objective_name = config_scenario.sparkle_objective.name
90 plot_params = {"xlabel": f"Default parameters [{objective_name}]",
91 "ylabel": f"Configured parameters [{objective_name}]",
92 "scale": "linear",
93 "limit_min": 1.5,
94 "limit_max": 1.5,
95 "replace_zeros": False,
96 "output_dir": target_directory
97 }
98 # Check if the scale of the axis can be considered linear
99 x_points = [p[0] for p in points]
100 y_points = [p[1] for p in points]
101 if not len(set(x_points)) == 1 and not len(set(y_points)) == 1:
102 linearity_x = linregress(x_points, range(len(points))).rvalue > 0.5
103 linearity_y = linregress(y_points, range(len(points))).rvalue > 0.5
104 if not linearity_x or not linearity_y:
105 plot_params["scale"] = "log"
106 plot_params["replace_zeros"] = True
108 stex.generate_comparison_plot(points,
109 figure_filename,
110 **plot_params)
112 return f"\\includegraphics[width=0.6\\textwidth]{{{figure_filename}}}"
115def get_timeouts_instanceset(config_output: ConfigurationOutput,
116 instance_set: InstanceSet) -> tuple[int, int, int]:
117 """Return the number of timeouts by configured, default and both on the instance set.
119 Args:
120 configuration_scenario: ConfigurationScenario
121 config_output: ConfigurationOutput
123 Returns:
124 A tuple containing the number of timeouts for the different configurations
125 """
126 solver_key = str(config_output.solver.directory)
127 instance_keys = [str(instance) for instance in instance_set.instance_paths]
128 # Determine status objective name
129 objective = [o for o in config_output.performance_data.objectives
130 if o.stem.lower() == "status"][0]
131 _, configured_status = config_output.performance_data.configuration_performance(
132 solver_key,
133 configuration=config_output.best_configuration,
134 objective=objective,
135 instances=instance_keys,
136 per_instance=True)
137 _, default_status = config_output.performance_data.configuration_performance(
138 solver_key,
139 configuration=PerformanceDataFrame.missing_value,
140 objective=objective,
141 instances=instance_keys,
142 per_instance=True)
144 default_timeouts, configured_timeouts, shared = 0, 0, 0
145 for configured_status, default_status in zip(configured_status, default_status):
146 configured_status, default_status =\
147 SolverStatus(configured_status), SolverStatus(default_status)
148 if configured_status == SolverStatus.TIMEOUT:
149 configured_timeouts += 1
150 if default_status == SolverStatus.TIMEOUT:
151 default_timeouts += 1
152 if (configured_status == SolverStatus.TIMEOUT
153 and default_status == SolverStatus.TIMEOUT):
154 shared += 1
155 return configured_timeouts, default_timeouts, shared
158def get_ablation_table(scenario: AblationScenario) -> str:
159 """Generate a LaTeX table of the ablation path.
161 This is the result of the ablation analysis to determine the parameter importance.
163 Args:
164 solver: The solver object
165 instance_set_train_name: Name of the instance set for training
166 instance_set_test_name: Name of the instance set for testing
168 Returns:
169 A string containing the LaTeX table code of the ablation path
170 """
171 results = scenario.read_ablation_table()
172 table_string = r"\begin{tabular}{rp{0.25\linewidth}rrr}"
173 # "Round", "Flipped parameter", "Source value", "Target value", "Validation result"
174 for i, line in enumerate(results):
175 # If this fails something has changed in the representation of ablation tables
176 if len(line) != 5:
177 print("""ERROR: something has changed with the representation
178 of ablation tables""")
179 sys.exit(-1)
180 if i == 0:
181 line = [f"\\textbf{{{word}}}" for word in line]
183 # Put multiple variable changes in one round on a seperate line
184 if (len(line[1].split(",")) > 1
185 and len(line[1].split(",")) == len(line[2].split(","))
186 and len(line[1].split(",")) == len(line[3].split(","))):
187 params = line[1].split(",")
188 default_values = line[2].split(",")
189 flipped_values = line[3].split(",")
191 sublines = len(params)
192 for subline in range(sublines):
193 round = "" if subline != 0 else line[0]
194 result = "" if subline + 1 != sublines else line[-1]
195 printline = [round, params[subline], default_values[subline],
196 flipped_values[subline], result]
197 table_string += " & ".join(printline) + " \\\\ "
198 else:
199 table_string += " & ".join(line) + " \\\\ "
200 if i == 0:
201 table_string += "\\hline "
202 table_string += "\\end{tabular}"
204 return table_string
207def configuration_report_variables(config_scenario: ConfigurationScenario,
208 config_output: ConfigurationOutput,
209 target_dir: Path,
210 bib_path: Path,
211 extractor_dir: Path,
212 extractor_cutoff: int,
213 ablation: AblationScenario = None) -> dict:
214 """Return a dict matching LaTeX variables and their values.
216 Args:
217 config_scenario: Object representation of the ConfigurationScenario
218 config_output: Object representation of the ConfigurationOutput
219 target_dir: Target directory
220 bib_path: Path to the latex bib file.
221 extractor_dir: General platform extractor Directory
222 extractactor_cutoff: Extractor cut off time.
223 ablation: Whether or not ablation is used. Defaults to True.
225 Returns:
226 A dictionary containing the variables and values
227 """
228 has_test = config_output.instance_set_test is not None
229 full_dict = {"bibliographypath": bib_path.absolute(),
230 "sparkleVersion": about.version}
232 full_dict.update(get_dict_variable_to_value_common(config_scenario,
233 config_output,
234 ablation,
235 target_dir))
237 if has_test:
238 test_dict = get_dict_variable_to_value_test(config_output,
239 config_scenario,
240 target_dir,
241 ablation)
242 full_dict.update(test_dict)
243 full_dict["testBool"] = f"\\test{str(has_test).lower()}"
245 if ablation is None:
246 full_dict["ablationBool"] = "\\ablationfalse"
248 if full_dict["featuresBool"] == "\\featurestrue":
249 full_dict["numFeatureExtractors"] =\
250 len([p for p in extractor_dir.iterdir()])
251 full_dict["featureExtractorList"] =\
252 stex.list_to_latex([(p.name, "") for p in extractor_dir.iterdir()])
253 full_dict["featureComputationCutoffTime"] = extractor_cutoff
255 return full_dict
258def get_dict_variable_to_value_common(config_scenario: ConfigurationScenario,
259 config_output: ConfigurationOutput,
260 ablation: AblationScenario,
261 target_directory: Path) -> dict:
262 """Return a dict matching LaTeX variables and values used for all config. reports.
264 Args:
265 config_scenario: Configuration scenario
266 config_output: configuration output
267 ablation: Ablation scenario, if ran
268 target_directory: Path to directory with configuration reports
270 Returns:
271 A dictionary containing the variables and values
272 """
273 objective = config_scenario.sparkle_objective
275 latex_dict = {"objectiveName": objective.name,
276 "configuratorName": config_output.configurator.name,
277 "configuratorVersion": config_output.configurator.version,
278 "configuratorFullName": config_output.configurator.full_name}
280 if objective.time:
281 latex_dict["runtimeBool"] = "\\runtimetrue"
282 latex_dict["objectiveType"] = "RUNTIME"
283 else:
284 latex_dict["runtimeBool"] = "\\runtimefalse"
285 latex_dict["objectiveType"] = "QUALITY"
286 if objective.minimise:
287 latex_dict["minMaxAdjective"] = "lowest"
288 else:
289 latex_dict["minMaxAdjective"] = "highest"
291 latex_dict["solver"] = config_output.solver.name
292 latex_dict["instanceSetTrain"] = config_scenario.instance_set.name
293 latex_dict["numInstanceInTrainingInstanceSet"] = config_scenario.instance_set.size
295 latex_dict["numConfiguratorRuns"] = config_scenario.number_of_runs
296 if hasattr(config_scenario, "tuner_timeout"): # ParamILS
297 latex_dict["wholeTimeBudget"] = config_scenario.tuner_timeout
298 elif hasattr(config_scenario, "wallclock_time"): # SMAC2 / IRACE
299 latex_dict["wholeTimeBudget"] = config_scenario.wallclock_time
300 elif hasattr(config_scenario, "smac3_scenario"): # SMAC3
301 latex_dict["wholeTimeBudget"] = config_scenario.smac3_scenario.walltime_limit
302 else:
303 latex_dict["wholeTimeBudget"] = config_scenario.max_time
304 latex_dict["eachRunCutoffTime"] = config_scenario.cutoff_time
306 opt_config_list = [f"{key}: {value}" for key, value in
307 config_output.best_configuration.items()]
308 latex_dict["optimisedConfiguration"] = stex.list_to_latex(opt_config_list)
309 latex_dict["optimisedConfigurationTrainingPerformance"] =\
310 config_output.best_performance_train
311 latex_dict["defaultConfigurationTrainingPerformance"] =\
312 config_output.default_performance_train
314 latex_dict["figure-configured-vs-default-train"] = plot_configured_vs_default(
315 config_output,
316 config_scenario,
317 target_directory)
319 # Retrieve timeout numbers for the training instances
320 configured_timeouts_train, default_timeouts_train, overlapping_timeouts_train =\
321 get_timeouts_instanceset(
322 config_output,
323 config_output.instance_set_train)
325 latex_dict["timeoutsTrainDefault"] = default_timeouts_train
326 latex_dict["timeoutsTrainConfigured"] = configured_timeouts_train
327 latex_dict["timeoutsTrainOverlap"] = overlapping_timeouts_train
328 latex_dict["ablationBool"] = get_ablation_bool(ablation)
329 latex_dict["ablationPath"] = get_ablation_table(ablation)
330 latex_dict["featuresBool"] = get_features_bool(
331 config_scenario, config_output.solver.name, config_scenario.instance_set)
333 return latex_dict
336def get_dict_variable_to_value_test(
337 config_output: ConfigurationOutput,
338 configuration_scenario: ConfigurationScenario,
339 target_dir: Path,
340 ablation: AblationScenario) -> dict:
341 """Return a dict matching test set specific latex variables with their values.
343 Args:
344 config_output: Configuration output
345 configuration_scenario: Configuration scenario
346 target_dir: Path to where output should go
347 ablation: Ablation scenario, if ran
349 Returns:
350 A dictionary containting the variables and their values
351 """
352 test_dict = {"instanceSetTest": config_output.instance_set_test.name}
353 test_dict["numInstanceInTestingInstanceSet"] = config_output.instance_set_test.size
354 test_dict["optimisedConfigurationTestingPerformance"] =\
355 config_output.best_performance_test
356 test_dict["defaultConfigurationTestingPerformance"] =\
357 config_output.default_performance_test
359 test_dict["figure-configured-vs-default-test"] =\
360 plot_configured_vs_default(
361 config_output, configuration_scenario,
362 target_dir, test_mode=True)
364 # Retrieve timeout numbers for the testing instances
365 configured_timeouts_test, default_timeouts_test, overlapping_timeouts_test =\
366 get_timeouts_instanceset(config_output,
367 config_output.instance_set_test)
369 test_dict["timeoutsTestDefault"] = default_timeouts_test
370 test_dict["timeoutsTestConfigured"] = configured_timeouts_test
371 test_dict["timeoutsTestOverlap"] = overlapping_timeouts_test
372 test_dict["ablationBool"] = get_ablation_bool(ablation)
373 test_dict["ablationPath"] = get_ablation_table(ablation)
374 return test_dict
377def generate_report_for_configuration(config_scenario: ConfigurationScenario,
378 config_output: ConfigurationOutput,
379 extractor_dir: Path,
380 target_path: Path,
381 latex_template_path: Path,
382 bibliography_path: Path,
383 extractor_cuttoff: int,
384 ablation: AblationScenario = None) -> None:
385 """Generate a report for algorithm configuration.
387 Args:
388 config_scenario: The configuration scenario to report
389 config_output: The configuration output object of the scenario
390 extractor_dir: Path to the extractor used
391 target_path: Where the report files will be placed.
392 latex_template_path: Path to the template to use for the report
393 bibliography_path: The bib corresponding to the latex template
394 extractor_cuttoff: Cut off for extractor
395 ablation: The ablation scenario if ablation was run.
396 """
397 target_path.mkdir(parents=True, exist_ok=True)
398 variables_dict = configuration_report_variables(
399 config_scenario, config_output, target_path, bibliography_path, extractor_dir,
400 extractor_cuttoff, ablation)
401 stex.generate_report(latex_template_path,
402 "template-Sparkle-for-configuration.tex",
403 target_path,
404 "Sparkle_Report_for_Configuration",
405 variables_dict)