Coverage for sparkle/platform/generate_report_for_configuration.py: 91%
178 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Helper functions for algorithm configuration report generation."""
4from __future__ import annotations
6import sys
7from pathlib import Path
9from scipy.stats import linregress
11from sparkle.platform import latex as stex
12from sparkle.solver.ablation import AblationScenario
13from sparkle.solver.validator import Validator
14from sparkle.configurator.configurator import Configurator, ConfigurationScenario
15from sparkle.solver import Solver
16from sparkle.instance import InstanceSet
17from sparkle.types import SparkleObjective
18from sparkle import about
21def get_features_bool(configurator_scenario: ConfigurationScenario,
22 solver_name: str, train_set: InstanceSet) -> str:
23 """Return a bool string for latex indicating whether features were used.
25 True if a feature file is given in the scenario file, false otherwise.
27 Args:
28 solver_name: Name of the solver
29 instance_set_train_name: Name of the instance set used for training
31 Returns:
32 A string describing whether features are used
33 """
34 scenario_file = configurator_scenario.directory \
35 / f"{solver_name}_{train_set.name}_scenario.txt"
37 for line in scenario_file.open("r").readlines():
38 if line.split(" ")[0] == "feature_file":
39 return "\\featurestrue"
40 return "\\featuresfalse"
43def get_average_performance(results: list[list[str]],
44 objective: SparkleObjective) -> float:
45 """Return the PAR score for a given results file and cutoff time.
47 Args:
48 results_file: Name of the result file
49 objective: The objective to average
51 Returns:
52 Average performance value
53 """
54 instance_per_dict = get_dict_instance_to_performance(results,
55 objective)
56 num_instances = len(instance_per_dict.keys())
57 sum_par = sum(float(instance_per_dict[instance]) for instance in instance_per_dict)
58 return float(sum_par / num_instances)
61def get_dict_instance_to_performance(results: list[list[str]],
62 objective: SparkleObjective) -> dict[str, float]:
63 """Return a dictionary of instance names and their performance.
65 Args:
66 results: Results from CSV
67 objective: The Sparkle Objective we are converting for
68 Returns:
69 A dictionary containing the performance for each instance
70 """
71 value_column = results[0].index(objective.name)
72 results_per_instance = {}
73 for row in results[1:]:
74 value = float(row[value_column])
75 results_per_instance[Path(row[3]).name] = value
76 return results_per_instance
79def get_ablation_bool(scenario: AblationScenario) -> str:
80 """Return the ablation bool as LaTeX string.
82 Args:
83 solver: The solver object
84 instance_train_name: Name of the trianing instance set
85 instance_test_name: Name of the testing instance set
87 Returns:
88 A string describing whether ablation was run or not
89 """
90 if scenario.check_for_ablation():
91 return "\\ablationtrue"
92 return "\\ablationfalse"
95def get_data_for_plot(configured_results: list[list[str]],
96 default_results: list[list[str]],
97 objective: SparkleObjective) -> list:
98 """Return the required data to plot.
100 Creates a nested list of performance values algorithm runs with default and
101 configured parameters on instances in a given instance set.
103 Args:
104 configured_results_dir: Directory of results for configured solver
105 default_results_dir: Directory of results for default solver
106 run_cutoff_time: Cutoff time
108 Returns:
109 A list of lists containing data points
110 """
111 dict_instance_to_par_default = get_dict_instance_to_performance(
112 default_results, objective)
113 dict_instance_to_par_configured = get_dict_instance_to_performance(
114 configured_results, objective)
116 instances = (dict_instance_to_par_default.keys()
117 & dict_instance_to_par_configured.keys())
118 if (len(dict_instance_to_par_default) != len(instances)):
119 print("""ERROR: Number of instances does not match
120 the number of performance values for the default configuration.""")
121 sys.exit(-1)
122 points = []
123 for instance in instances:
124 point = [dict_instance_to_par_default[instance],
125 dict_instance_to_par_configured[instance]]
126 points.append(point)
128 return points
131def get_figure_configure_vs_default(configured_results: list[list[str]],
132 default_results: list[list[str]],
133 target_directory: Path,
134 figure_filename: str,
135 performance_measure: str,
136 run_cutoff_time: float,
137 objective: SparkleObjective) -> str:
138 """Create a figure comparing the configured and default solver.
140 Base function to create a comparison plot of a given instance set between the default
141 and configured performance.
143 Args:
144 configured_results_dir: Directory of results for configured solver
145 default_results_dir: Directory of results for default solver
146 target_directory: Directory for the configuration reports
147 figure_filename: Filename for the figure
148 run_cutoff_time: Cutoff time
150 Returns:
151 A string containing the latex command to include the figure
152 """
153 points = get_data_for_plot(configured_results, default_results,
154 objective)
156 plot_params = {"xlabel": f"Default parameters [{performance_measure}]",
157 "ylabel": f"Configured parameters [{performance_measure}]",
158 "scale": "linear",
159 "limit_min": 1.5,
160 "limit_max": 1.5,
161 "replace_zeros": False,
162 "output_dir": target_directory
163 }
164 # Check if the scale of the axis can be considered linear
165 linearity_x = linregress([p[0] for p in points], range(len(points))).rvalue > 0.5
166 linearity_y = linregress([p[1] for p in points], range(len(points))).rvalue > 0.5
167 if not linearity_x or not linearity_y:
168 plot_params["scale"] = "log"
169 plot_params["replace_zeros"] = True
171 stex.generate_comparison_plot(points,
172 figure_filename,
173 **plot_params)
175 return f"\\includegraphics[width=0.6\\textwidth]{{{figure_filename}}}"
178def get_figure_configured_vs_default_on_instance_set(solver: Solver,
179 instance_set_name: str,
180 res_default: list[list[str]],
181 res_conf: list[list[str]],
182 target_directory: Path,
183 objective_type: str,
184 run_cutoff_time: float,
185 objective: SparkleObjective,
186 data_type: str = "train") -> str:
187 """Create a figure comparing the configured and default solver on the training set.
189 Manages the creation of a comparison plot of the instances in the train instance set
190 for the report by gathering the proper files and choosing the plotting parameters
191 based on the performance measure.
193 Args:
194 solver: The solver object
195 instance_set_train_name: Name of the instance set for training
196 configuration_reports_directory: Directory to the configuration reports
197 run_cutoff_time: Cutoff time
199 Returns:
200 A string containing the latex comand to include the figure
201 """
202 data_plot_configured_vs_default_on_instance_set_filename = (
203 f"data_{solver.name}_configured_vs_default_on_{instance_set_name}_{data_type}")
204 return get_figure_configure_vs_default(
205 res_conf, res_default, target_directory,
206 data_plot_configured_vs_default_on_instance_set_filename,
207 objective_type,
208 run_cutoff_time,
209 objective)
212def get_timeouts_instanceset(solver: Solver,
213 instance_set: InstanceSet,
214 configurator: Configurator,
215 validator: Validator,
216 configuration_scenario: ConfigurationScenario,
217 cutoff: float) -> tuple[int, int, int]:
218 """Return the number of timeouts by configured, default and both on the testing set.
220 Args:
221 solver: The solver object
222 instance_set: Instance Set
223 configurator: Configurator
224 validator: Validator
225 cutoff: Cutoff time
227 Returns:
228 A tuple containing the number of timeouts for the different configurations
229 """
230 objective = configuration_scenario.sparkle_objective
231 _, config = configurator.get_optimal_configuration(configuration_scenario)
232 res_default = validator.get_validation_results(solver,
233 instance_set,
234 config="")
235 res_conf = validator.get_validation_results(solver,
236 instance_set,
237 config=config)
238 dict_instance_to_par_configured = get_dict_instance_to_performance(
239 res_conf, objective)
240 dict_instance_to_par_default = get_dict_instance_to_performance(
241 res_default, objective)
243 return get_timeouts(dict_instance_to_par_configured,
244 dict_instance_to_par_default, cutoff)
247def get_timeouts(instance_to_par_configured: dict,
248 instance_to_par_default: dict,
249 cutoff: float) -> tuple[int, int, int]:
250 """Return the number of timeouts for given dicts.
252 Args:
253 dict_instance_to_par_configured: _description_
254 dict_instance_to_par_default: _description_
255 cutoff: Cutoff value
257 Returns:
258 A tuple containing timeout values
259 """
260 configured_timeouts = 0
261 default_timeouts = 0
262 overlapping_timeouts = 0
264 for instance in instance_to_par_configured:
265 configured_par = instance_to_par_configured[instance]
266 default_par = instance_to_par_default[instance]
267 # Count the amount of values that are equal to timeout
268 configured_timeouts += (configured_par > cutoff)
269 default_timeouts += (default_par > cutoff)
270 overlapping_timeouts += (configured_par > cutoff
271 and default_par > cutoff)
273 return configured_timeouts, default_timeouts, overlapping_timeouts
276def get_ablation_table(scenario: AblationScenario) -> str:
277 """Generate a LaTeX table of the ablation path.
279 This is the result of the ablation analysis to determine the parameter importance.
281 Args:
282 solver: The solver object
283 instance_set_train_name: Name of the instance set for training
284 instance_set_test_name: Name of the instance set for testing
286 Returns:
287 A string containing the LaTeX table code of the ablation path
288 """
289 results = scenario.read_ablation_table()
290 table_string = r"\begin{tabular}{rp{0.25\linewidth}rrr}"
291 # "Round", "Flipped parameter", "Source value", "Target value", "Validation result"
292 for i, line in enumerate(results):
293 # If this fails something has changed in the representation of ablation tables
294 if len(line) != 5:
295 print("""ERROR: something has changed with the representation
296 of ablation tables""")
297 sys.exit(-1)
298 if i == 0:
299 line = [f"\\textbf{{{word}}}" for word in line]
301 # Put multiple variable changes in one round on a seperate line
302 if (len(line[1].split(",")) > 1
303 and len(line[1].split(",")) == len(line[2].split(","))
304 and len(line[1].split(",")) == len(line[3].split(","))):
305 params = line[1].split(",")
306 default_values = line[2].split(",")
307 flipped_values = line[3].split(",")
309 sublines = len(params)
310 for subline in range(sublines):
311 round = "" if subline != 0 else line[0]
312 result = "" if subline + 1 != sublines else line[-1]
313 printline = [round, params[subline], default_values[subline],
314 flipped_values[subline], result]
315 table_string += " & ".join(printline) + " \\\\ "
316 else:
317 table_string += " & ".join(line) + " \\\\ "
318 if i == 0:
319 table_string += "\\hline "
320 table_string += "\\end{tabular}"
322 return table_string
325def configuration_report_variables(target_dir: Path,
326 solver: Solver,
327 configurator: Configurator,
328 validator: Validator,
329 extractor_dir: Path,
330 bib_path: Path,
331 config_scenario: ConfigurationScenario,
332 extractor_cuttoff: int,
333 instance_set_test: InstanceSet = None,
334 ablation: AblationScenario = None) -> dict:
335 """Return a dict matching LaTeX variables and their values.
337 Args:
338 solver: Object representation of the Solver
339 instance_set_train: Path of the instance set for training
340 instance_set_test: Path of the instance set for testing. Defaults to None.
341 ablation: Whether or not ablation is used. Defaults to True.
343 Returns:
344 A dictionary containing the variables and values
345 """
346 has_test = instance_set_test is not None
348 full_dict = get_dict_variable_to_value_common(solver,
349 configurator,
350 validator,
351 ablation,
352 bib_path,
353 config_scenario,
354 target_dir)
356 if has_test:
357 test_dict = get_dict_variable_to_value_test(target_dir,
358 solver,
359 configurator,
360 validator,
361 ablation,
362 config_scenario,
363 instance_set_test)
364 full_dict.update(test_dict)
365 full_dict["testBool"] = f"\\test{str(has_test).lower()}"
367 if ablation is None:
368 full_dict["ablationBool"] = "\\ablationfalse"
370 if full_dict["featuresBool"] == "\\featurestrue":
371 full_dict["numFeatureExtractors"] =\
372 len([p for p in extractor_dir.iterdir()])
373 full_dict["featureExtractorList"] =\
374 stex.list_to_latex([(p.name, "") for p in extractor_dir.iterdir()])
375 full_dict["featureComputationCutoffTime"] = extractor_cuttoff
377 return full_dict
380def get_dict_variable_to_value_common(solver: Solver,
381 configurator: Configurator,
382 validator: Validator,
383 ablation: AblationScenario,
384 bibliography_path: Path,
385 config_scenario: ConfigurationScenario,
386 target_directory: Path) -> dict:
387 """Return a dict matching LaTeX variables and values used for all config. reports.
389 Args:
390 Solver: The solver object
391 instance_set_train: Path of the instance set for training
392 instance_set_test: Path of the instance set for testing
393 target_directory: Path to directory with configuration reports
395 Returns:
396 A dictionary containing the variables and values
397 """
398 objective = config_scenario.sparkle_objective
399 _, opt_config = configurator.get_optimal_configuration(config_scenario)
400 res_default = validator.get_validation_results(
401 solver, config_scenario.instance_set, config="")
402 res_conf = validator.get_validation_results(
403 solver, config_scenario.instance_set, config=opt_config)
404 instance_names = set([res[3] for res in res_default])
405 opt_config_list = [f"{key}: {value}" for key, value in
406 Solver.config_str_to_dict(opt_config).items()]
408 latex_dict = {"bibliographypath": bibliography_path.absolute(),
409 "objectiveName": objective.name,
410 "configuratorName": configurator.name,
411 "configuratorVersion": configurator.version,
412 "configuratorFullName": configurator.full_name,
413 }
415 if objective.time:
416 latex_dict["runtimeBool"] = "\\runtimetrue"
417 latex_dict["objectiveType"] = "RUNTIME"
418 else:
419 latex_dict["runtimeBool"] = "\\runtimefalse"
420 latex_dict["objectiveType"] = "QUALITY"
421 if objective.minimise:
422 latex_dict["minMaxAdjective"] = "lowest"
423 else:
424 latex_dict["minMaxAdjective"] = "highest"
426 latex_dict["solver"] = solver.name
427 latex_dict["instanceSetTrain"] = config_scenario.instance_set.name
428 latex_dict["sparkleVersion"] = about.version
429 latex_dict["numInstanceInTrainingInstanceSet"] = len(instance_names)
431 latex_dict["numConfiguratorRuns"] = config_scenario.number_of_runs
432 if hasattr(config_scenario, "wallclock_time"):
433 latex_dict["wholeTimeBudget"] = config_scenario.wallclock_time
434 else:
435 latex_dict["wholeTimeBudget"] = config_scenario.max_time
436 latex_dict["eachRunCutoffTime"] = config_scenario.cutoff_time
437 latex_dict["optimisedConfiguration"] = stex.list_to_latex(opt_config_list)
438 latex_dict["optimisedConfigurationTrainingPerformancePAR"] =\
439 get_average_performance(res_conf, objective)
440 latex_dict["defaultConfigurationTrainingPerformancePAR"] =\
441 get_average_performance(res_default, objective)
443 str_value = get_figure_configured_vs_default_on_instance_set(
444 solver, config_scenario.instance_set.name, res_default, res_conf,
445 target_directory, latex_dict["objectiveType"],
446 float(config_scenario.cutoff_time), objective)
447 latex_dict["figure-configured-vs-default-train"] = str_value
449 # Retrieve timeout numbers for the training instances
450 configured_timeouts_train, default_timeouts_train, overlapping_timeouts_train =\
451 get_timeouts_instanceset(solver, config_scenario.instance_set, configurator,
452 validator, config_scenario, config_scenario.cutoff_time)
454 latex_dict["timeoutsTrainDefault"] = default_timeouts_train
455 latex_dict["timeoutsTrainConfigured"] = configured_timeouts_train
456 latex_dict["timeoutsTrainOverlap"] = overlapping_timeouts_train
457 latex_dict["ablationBool"] = get_ablation_bool(ablation)
458 latex_dict["ablationPath"] = get_ablation_table(ablation)
459 latex_dict["featuresBool"] = get_features_bool(
460 config_scenario, solver.name, config_scenario.instance_set)
462 return latex_dict
465def get_dict_variable_to_value_test(target_dir: Path,
466 solver: Solver,
467 configurator: Configurator,
468 validator: Validator,
469 ablation: AblationScenario,
470 configuration_scenario: ConfigurationScenario,
471 test_set: InstanceSet) -> dict:
472 """Return a dict matching test set specific latex variables with their values.
474 Args:
475 target_dir: Path to where output should go
476 solver: The solver object
477 configurator: Configurator for which the report is generated
478 validator: Validator that provided the data set results
479 train_set: Instance set for training
480 test_set: Instance set for testing
482 Returns:
483 A dictionary containting the variables and their values
484 """
485 _, config = configurator.get_optimal_configuration(configuration_scenario)
486 res_default = validator.get_validation_results(
487 solver, test_set, config="")
488 res_conf = validator.get_validation_results(
489 solver, test_set, config=config)
490 instance_names = set([res[3] for res in res_default])
491 run_cutoff_time = configuration_scenario.cutoff_time
492 objective = configuration_scenario.sparkle_objective
493 test_dict = {"instanceSetTest": test_set.name}
494 test_dict["numInstanceInTestingInstanceSet"] = len(instance_names)
495 test_dict["optimisedConfigurationTestingPerformancePAR"] =\
496 get_average_performance(res_conf, objective)
497 test_dict["defaultConfigurationTestingPerformancePAR"] =\
498 get_average_performance(res_default, objective)
500 objective_type = "RUNTIME" if objective.time else "QUALITY"
501 test_dict["figure-configured-vs-default-test"] =\
502 get_figure_configured_vs_default_on_instance_set(
503 solver, test_set.name, res_default, res_conf, target_dir, objective_type,
504 float(run_cutoff_time),
505 configuration_scenario.sparkle_objective, data_type="test")
507 # Retrieve timeout numbers for the testing instances
508 configured_timeouts_test, default_timeouts_test, overlapping_timeouts_test =\
509 get_timeouts_instanceset(solver,
510 test_set,
511 configurator,
512 validator,
513 configuration_scenario,
514 run_cutoff_time)
516 test_dict["timeoutsTestDefault"] = default_timeouts_test
517 test_dict["timeoutsTestConfigured"] = configured_timeouts_test
518 test_dict["timeoutsTestOverlap"] = overlapping_timeouts_test
519 test_dict["ablationBool"] = get_ablation_bool(ablation)
520 test_dict["ablationPath"] = get_ablation_table(ablation)
521 return test_dict
524def generate_report_for_configuration(solver: Solver,
525 configurator: Configurator,
526 validator: Validator,
527 extractor_dir: Path,
528 target_path: Path,
529 latex_template_path: Path,
530 bibliography_path: Path,
531 extractor_cuttoff: int,
532 config_scenario: ConfigurationScenario,
533 test_set: InstanceSet = None,
534 ablation: AblationScenario = None) -> None:
535 """Generate a report for algorithm configuration.
537 Args:
538 solver: Object representation of the solver
539 configurator: Configurator for the report
540 validator: Validator that validated the configurator
541 extractor_dir: Path to the extractor used
542 target_path: Where the report files will be placed.
543 latex_template_path: Path to the template to use for the report
544 bibliography_path: The bib corresponding to the latex template
545 config_scenario: The configuration scenario to report
546 extractor_cuttoff: Cut off for extractor
547 test_set: Instance set for testing
548 ablation: Whether or not ablation is used. Defaults to True.
549 """
550 target_path.mkdir(parents=True, exist_ok=True)
551 variables_dict = configuration_report_variables(
552 target_path, solver, configurator, validator, extractor_dir, bibliography_path,
553 config_scenario, extractor_cuttoff, test_set,
554 ablation)
555 stex.generate_report(latex_template_path,
556 "template-Sparkle-for-configuration.tex",
557 target_path,
558 "Sparkle_Report_for_Configuration",
559 variables_dict)