Coverage for src / sparkle / CLI / generate_report.py: 65%
470 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 15:31 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 15:31 +0000
1#!/usr/bin/env python3
2"""Sparkle command to generate a report for an executed experiment."""
4import sys
5import shutil
6import argparse
7from pathlib import Path
8import time
9import json
10import pandas as pd
12from pylatex import NoEscape, NewPage
13import pylatex as pl
14from sparkle import __version__ as __sparkle_version__
16from sparkle.CLI.help import global_variables as gv
17from sparkle.CLI.help import resolve_object_name
18from sparkle.CLI.help import logging as sl
19from sparkle.CLI.help import argparse_custom as ac
21from sparkle.solver import Solver
22from sparkle.instance import Instance_Set
23from sparkle.selector import Extractor
24from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
25from sparkle.configurator.configurator import ConfigurationScenario
26from sparkle.selector.selector import SelectionScenario
27from sparkle.types import SolverStatus
28from sparkle.platform import Settings
30from sparkle.platform import latex
31from sparkle.platform.output.configuration_output import ConfigurationOutput
32from sparkle.platform.output.selection_output import SelectionOutput
35MAX_DEC = 4 # Maximum decimals used for each reported value
36MAX_COLS_PER_TABLE = 2 # number of value columns extra to number of key columns
37WIDE_TABLE_THRESHOLD = 4 # columns above which we switch to landscape
38NUM_KEYS_PDF = 3
39NUM_KEYS_FDF = 3
40MAX_CELL_LEN = 17
43def parser_function() -> argparse.ArgumentParser:
44 """Define the command line arguments."""
45 parser = argparse.ArgumentParser(
46 description="Generates a report for all known selection, configuration and "
47 "parallel portfolio scenarios will be generated.",
48 epilog="If you wish to filter specific solvers, instance sets, ... have a look "
49 "at the command line arguments.",
50 )
51 # Add argument for filtering solvers
52 parser.add_argument(
53 *ac.SolversReportArgument.names, **ac.SolversReportArgument.kwargs
54 )
55 # Add argument for filtering instance sets
56 parser.add_argument(
57 *ac.InstanceSetsReportArgument.names, **ac.InstanceSetsReportArgument.kwargs
58 )
60 # Add argument for filtering appendix
61 parser.add_argument(
62 *Settings.OPTION_appendices.args, **Settings.OPTION_appendices.kwargs
63 )
65 # Add argument for filtering configurators?
66 # Add argument for filtering selectors?
67 # Add argument for filtering ??? scenario ids? configuration ids?
68 parser.add_argument(*ac.GenerateJSONArgument.names, **ac.GenerateJSONArgument.kwargs)
69 return parser
72def generate_configuration_section(
73 report: pl.Document,
74 scenario: ConfigurationScenario,
75 scenario_output: ConfigurationOutput,
76) -> None:
77 """Generate a section for a configuration scenario."""
78 report_dir = Path(report.default_filepath).parent
79 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
80 plot_dir = (
81 report_dir
82 / f"{scenario.configurator.__name__}_{scenario.name}_plots_{time_stamp}"
83 )
84 plot_dir.mkdir(exist_ok=True)
86 # 1. Write section intro
87 report.append(
88 pl.Section(
89 f"{scenario.configurator.__name__} Configuration: "
90 f"{scenario.solver.name} on {scenario.instance_set.name}"
91 )
92 )
93 report.append("In this scenario, ")
94 report.append(
95 pl.UnsafeCommand(
96 f"textbf{{{scenario.configurator.__name__}}} "
97 f"({scenario.configurator.full_name})~\\cite"
98 f"{{{scenario.configurator.__name__}}} with version "
99 f"{scenario.configurator.version} was used for configuration. "
100 )
101 )
102 report.append(
103 f"The Solver {scenario.solver} was optimised on training set "
104 f"{scenario.instance_set}. The scenario was run {scenario.number_of_runs} "
105 f"times independently with different seeds, yielding {scenario.number_of_runs} "
106 f"configurations. The cutoff time for the solver was set to "
107 f"{scenario.solver_cutoff_time} seconds. The optimised objective is "
108 f"{scenario.sparkle_objectives[0]}. Each Configuration was evaluated on the "
109 "training set to determine the best configuration, e.g. the best "
110 f"{scenario.sparkle_objectives[0]} value on the training set."
111 )
113 # 2. Report all the configurator settings in table format
114 report.append(pl.Subsection("Configurator Settings"))
115 report.append(
116 f"The following settings were used for {scenario.configurator.__name__}:\n"
117 )
118 tabular = pl.Tabular("l|r")
119 tabular.add_row("Setting", "Value")
120 tabular.add_hline()
121 for setting, value in scenario.serialise().items():
122 # Keep only the last path segment for paths
123 # Otherwise tables get too wide and we can't see other values
124 stripped_value = str(value).strip().replace("\\", "/")
125 segments = [segment for segment in stripped_value.split("/") if segment]
126 if segments[-1]:
127 tabular.add_row([setting, segments[-1]])
128 else:
129 tabular.add_row([setting, "None"])
130 table_conf_settings = pl.Table(position="h")
131 table_conf_settings.append(pl.UnsafeCommand("centering"))
132 table_conf_settings.append(tabular)
133 table_conf_settings.add_caption("Configurator Settings")
134 report.append(table_conf_settings)
136 # 3. Report details on instance and solver used
137 report.append(pl.Subsection("Solver & Instance Set(s) Details"))
138 cs = scenario_output.solver.get_configuration_space()
139 report.append(
140 f"The solver {scenario_output.solver} was configured using "
141 f"{len(cs.values())} configurable (hyper)parameters. "
142 f"The configuration space has {len(cs.conditions)} conditions. "
143 )
144 report.append("The following instance sets were used for the scenario:")
145 with report.create(pl.Itemize()) as instance_set_latex_list:
146 for instance_set in [
147 scenario_output.instance_set_train
148 ] + scenario_output.test_instance_sets:
149 training_set_name = instance_set.name.replace("_", " ") # Latex fix
150 instance_set_latex_list.add_item(
151 pl.UnsafeCommand(
152 f"textbf{{{training_set_name}}} ({instance_set.size} instances)"
153 )
154 )
156 # Function to generate a results summary of default vs best on an instance set
157 def instance_set_summary(instance_set_name: str) -> None:
158 """Generate a results summary of default vs best on an instance set."""
159 instance_set_results = scenario_output.instance_set_results[instance_set_name]
160 report.append(
161 f"The {scenario.sparkle_objectives[0]} value of the Default "
162 f"Configuration on {instance_set_name} was "
163 )
164 report.append(
165 pl.UnsafeCommand(
166 f"textbf{{{round(instance_set_results.default_performance, MAX_DEC)}}}.\n"
167 )
168 )
169 report.append(
170 f"The {scenario.sparkle_objectives[0]} value of the Best "
171 f"Configuration on {instance_set_name} was "
172 )
173 report.append(
174 pl.UnsafeCommand(
175 f"textbf{{{round(instance_set_results.best_performance, MAX_DEC)}}}.\n"
176 )
177 )
178 report.append("In ")
179 report.append(latex.AutoRef(f"fig:bestvsdefault{instance_set_name}{time_stamp}"))
180 report.append(pl.utils.bold(" ")) # Force white space
181 report.append("the results are plotted per instance.")
182 # Create graph to compare best configuration vs default on the instance set
184 df = pd.DataFrame(
185 [
186 instance_set_results.default_instance_performance,
187 instance_set_results.best_instance_performance,
188 ],
189 index=["Default Configuration", "Best Configuration"],
190 dtype=float,
191 ).T
192 plot = latex.comparison_plot(df, None)
193 plot_path = (
194 plot_dir / f"{scenario_output.best_configuration_key}_vs_"
195 f"Default_{instance_set_name}.pdf"
196 )
197 plot.write_image(plot_path, width=500, height=500)
198 with report.create(pl.Figure(position="h")) as figure:
199 figure.add_image(
200 str(plot_path.relative_to(report_dir)),
201 width=pl.utils.NoEscape(r"0.6\textwidth"),
202 )
203 figure.add_caption(
204 f"Best vs Default Performance on {instance_set_name} "
205 f"({scenario.sparkle_objectives[0]})"
206 )
207 figure.append(
208 pl.UnsafeCommand(
209 r"label{"
210 f"fig:bestvsdefault{instance_set_name}{time_stamp}"
211 r"}"
212 )
213 )
214 if scenario.sparkle_objectives[0].time: # Write status table
215 report.append("The following Solver status were found per instance:")
216 tabular = pl.Tabular("l|c|c|c")
217 tabular.add_row("Status", "Default", "Best", "Overlap")
218 tabular.add_hline()
219 # Count the statuses
220 for status in SolverStatus:
221 default_count, best_count, overlap_count = 0, 0, 0
222 for instance in instance_set_results.instance_status_default.keys():
223 instance = str(instance)
224 default_hit = (
225 instance_set_results.instance_status_default[instance] == status
226 )
227 best_hit = (
228 instance_set_results.instance_status_best[instance] == status
229 )
230 default_count += default_hit
231 best_count += best_hit
232 overlap_count += default_hit and best_hit
233 if default_count or best_count:
234 tabular.add_row(status, default_count, best_count, overlap_count)
235 table_status_values = pl.Table(position="h")
236 table_status_values.append(pl.UnsafeCommand("centering"))
237 table_status_values.append(tabular)
238 table_status_values.add_caption(
239 "Status count for the best and default configuration."
240 )
241 report.append(table_status_values)
243 # 4. Report the results of the best configuration on the training set vs the default
244 report.append(
245 pl.Subsection(
246 f"Comparison of Default and Best Configuration on Training Set "
247 f"{scenario_output.instance_set_train.name}"
248 )
249 )
250 instance_set_summary(scenario_output.instance_set_train.name)
252 # 5. Report the actual config values
253 report.append(pl.Subsubsection("Best Configuration Values"))
254 if (
255 scenario_output.best_configuration_key
256 == PerformanceDataFrame.default_configuration
257 ):
258 report.append(
259 "The configurator failed to find a better configuration than the "
260 "default configuration on the training set in this scenario."
261 )
262 else:
263 report.append(
264 "The following parameter values "
265 "were found to be the best on the training set:\n"
266 )
267 tabular = pl.Tabular("l|r")
268 tabular.add_row("Parameter", "Value")
269 tabular.add_hline()
270 for parameter, value in scenario_output.best_configuration.items():
271 tabular.add_row([parameter, str(value)])
272 table_best_values = pl.Table(position="h")
273 table_best_values.append(pl.UnsafeCommand("centering"))
274 table_best_values.append(tabular)
275 table_best_values.add_caption("Best found configuration values")
276 report.append(table_best_values)
278 # 6. Report the results of best vs default conf on the test sets
280 for test_set in scenario_output.test_instance_sets:
281 report.append(
282 pl.Subsection(
283 f"Comparison of Default and Best Configuration on Test Set "
284 f"{test_set.name}"
285 )
286 )
287 instance_set_summary(test_set.name)
289 # 7. Report the parameter ablation scenario if present
290 if scenario.ablation_scenario:
291 report.append(pl.Subsection("Parameter importance via Ablation"))
292 report.append("Ablation analysis ")
293 report.append(pl.UnsafeCommand(r"cite{FawcettHoos16} "))
294 test_set = scenario.ablation_scenario.test_set
295 if not scenario.ablation_scenario.test_set:
296 test_set = scenario.ablation_scenario.train_set
297 report.append(
298 f"is performed from the default configuration of {scenario.solver} to the "
299 f"best found configuration ({scenario_output.best_configuration_key}) "
300 "to see which parameter changes between them contribute most to the improved"
301 " performance. The ablation path uses the training set "
302 f"{scenario.ablation_scenario.train_set.name} and validation is performed "
303 f"on the test set {test_set.name}. The set of parameters that differ in the "
304 "two configurations will form the ablation path. Starting from the default "
305 "configuration, the path is computed by performing a sequence of rounds. In"
306 " a round, each available parameter is flipped in the configuration and is "
307 "validated on its performance. The flipped parameter with the best "
308 "performance in that round, is added to the configuration and the next round"
309 " starts with the remaining parameters. This repeats until all parameters "
310 "are flipped, which is the best found configuration. The analysis resulted "
311 "in the ablation presented in "
312 )
313 report.append(latex.AutoRef("tab:ablationtable"))
314 report.append(".")
316 # Add ablation table
317 tabular = pl.Tabular("r|l|r|r|r")
318 data = scenario.ablation_scenario.read_ablation_table()
319 for index, row in enumerate(data):
320 tabular.add_row(*row)
321 if index == 0:
322 tabular.add_hline()
323 table_ablation = pl.Table(position="h")
324 table_ablation.append(pl.UnsafeCommand("centering"))
325 table_ablation.append(tabular)
326 table_ablation.add_caption("Ablation table")
327 table_ablation.append(pl.UnsafeCommand(r"label{tab:ablationtable}"))
328 report.append(table_ablation)
331def generate_selection_section(
332 report: pl.Document, scenario: SelectionScenario, scenario_output: SelectionOutput
333) -> None:
334 """Generate a section for a selection scenario."""
335 report_dir = Path(report.default_filepath).parent
336 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
337 plot_dir = report_dir / f"{scenario.name.replace(' ', '_')}_plots_{time_stamp}"
338 plot_dir.mkdir(exist_ok=True)
339 report.append(
340 pl.Section(
341 f"Selection: {scenario.selector.model_class.__name__} on "
342 f"{' '.join([s[0] for s in scenario_output.training_instance_sets])}"
343 )
344 )
345 report.append(
346 f"In this scenario, a {scenario.selector.model_class.__name__} "
347 f" ({scenario.selector.selector_class.__name__}) was trained on the "
348 "performance and feature data using ASF-lib. The following solvers "
349 f"were run with a cutoff time of {scenario.solver_cutoff} seconds:"
350 )
351 with report.create(pl.Itemize()) as solver_latex_list:
352 for solver_name in scenario_output.solvers.keys():
353 solver_name = solver_name.replace("_", " ")
354 solver_latex_list.add_item(
355 pl.UnsafeCommand(
356 f"textbf{{{solver_name}}} "
357 f"({len(scenario_output.solvers[solver_name])} configurations)"
358 )
359 )
360 # Report training instance sets
361 report.append("The following training instance sets were used:")
362 with report.create(pl.Itemize()) as instance_set_latex_list:
363 for training_set_name, set_size in scenario_output.training_instance_sets:
364 training_set_name = training_set_name.replace("_", " ") # Latex fix
365 instance_set_latex_list.add_item(
366 pl.UnsafeCommand(f"textbf{{{training_set_name}}} ({set_size} instances)")
367 )
368 # Report feature extractors
369 report.append(
370 "The following feature extractors were used with a extractor cutoff "
371 f"time of {scenario.extractor_cutoff} seconds:"
372 )
373 with report.create(pl.Itemize()) as feature_extractor_latex_list:
374 for feature_extractor_name in scenario.feature_extractors:
375 extractor = resolve_object_name(
376 feature_extractor_name,
377 gv.file_storage_data_mapping[gv.extractor_nickname_list_path],
378 gv.settings().DEFAULT_extractor_dir,
379 class_name=Extractor,
380 )
381 feature_extractor_name = feature_extractor_name.replace("_", " ") # Latex
382 feature_extractor_latex_list.add_item(
383 pl.UnsafeCommand(
384 f"textbf{{{feature_extractor_name}}} "
385 f"({extractor.output_dimension} features)"
386 )
387 )
388 # Report Training results
389 report.append(pl.Subsection("Training Results"))
390 # 1. Report VBS and selector performance, create ranking list of the solvers
391 # TODO Add ref here to the training sets section?
392 report.append(
393 f"In this section, the {scenario.objective.name} results for the "
394 "portfolio selector on solving the training instance set(s) listed "
395 "is reported. "
396 )
397 report.append(
398 f"The {scenario.objective.name} values for the Virtual Best Solver "
399 "(VBS), i.e., the perfect portfolio selector is "
400 )
401 report.append(pl.utils.bold(f"{round(scenario_output.vbs_performance, MAX_DEC)}"))
402 report.append(", the actual portfolio selector performance is ")
403 report.append(
404 pl.utils.bold(f"{round(scenario_output.actual_performance, MAX_DEC)}.\n")
405 )
407 report.append(
408 f"Below, the solvers are ranked based on {scenario.objective.name} performance:"
409 )
410 with report.create(pl.Enumerate()) as ranking_list:
411 for solver_name, conf_id, value in scenario_output.solver_performance_ranking:
412 value = round(value, MAX_DEC)
413 solver_name = solver_name.replace("_", " ") # Latex fix
414 conf_id = conf_id.replace("_", " ") # Latex fix
415 ranking_list.add_item(
416 pl.UnsafeCommand(f"textbf{{{solver_name}}} ({conf_id}): {value}")
417 )
419 # 2. Marginal contribution ranking list VBS
420 report.append(pl.Subsubsection("Marginal Contribution Ranking List"))
421 report.append(
422 "The following list shows the marginal contribution ranking list for the VBS:"
423 )
424 with report.create(pl.Enumerate()) as ranking_list:
425 for (
426 solver_name,
427 conf_id,
428 contribution,
429 performance,
430 ) in scenario_output.marginal_contribution_perfect:
431 contribution, performance = (
432 round(contribution, MAX_DEC),
433 round(performance, MAX_DEC),
434 )
435 solver_name = solver_name.replace("_", " ") # Latex fix
436 conf_id = conf_id.replace("_", " ") # Latex fix
437 ranking_list.add_item(
438 pl.UnsafeCommand(
439 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})"
440 )
441 )
443 # 3. Marginal contribution ranking list actual selector
444 report.append(
445 "The following list shows the marginal contribution ranking list for "
446 "the actual portfolio selector:"
447 )
448 with report.create(pl.Enumerate()) as ranking_list:
449 for (
450 solver_name,
451 conf_id,
452 contribution,
453 performance,
454 ) in scenario_output.marginal_contribution_actual:
455 contribution, performance = (
456 round(contribution, MAX_DEC),
457 round(performance, MAX_DEC),
458 )
459 solver_name = solver_name.replace("_", " ") # Latex fix
460 conf_id = conf_id.replace("_", " ") # Latex fix
461 ranking_list.add_item(
462 pl.UnsafeCommand(
463 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})"
464 )
465 )
467 # 4. Create scatter plot analysis
468 report.append(pl.Subsubsection("Scatter Plot Analysis"))
469 report.append(latex.AutoRef(f"fig:sbsvsselector{time_stamp}"))
470 report.append(pl.utils.bold(" ")) # Trick to force a white space
471 report.append(
472 "shows the empirical comparison between the portfolio "
473 "selector and the single best solver (SBS). "
474 )
475 report.append(latex.AutoRef("fig:vbsvsselector"))
476 report.append(pl.utils.bold(" ")) # Trick to force a white space
477 report.append(
478 "shows the empirical comparison between the actual portfolio selector "
479 "and the virtual best solver (VBS)."
480 )
481 # Create figure on SBS versus the selector
482 sbs_name, sbs_config, _ = scenario_output.solver_performance_ranking[0]
483 # sbs_plot_name = f"{Path(sbs_name).name} ({sbs_config})"
484 sbs_performance = scenario_output.sbs_performance
485 selector_performance = scenario_output.actual_performance_data
487 # Join the data together
489 df = pd.DataFrame(
490 [sbs_performance, selector_performance],
491 index=[f"{Path(sbs_name).name} ({sbs_config})", "Selector"],
492 dtype=float,
493 ).T
494 plot = latex.comparison_plot(df, "Single Best Solver vs Selector")
495 plot_path = (
496 plot_dir / f"{Path(sbs_name).name}_{sbs_config}_vs_"
497 f"Selector_{scenario.selector.model_class.__name__}.pdf"
498 )
499 plot.write_image(plot_path, width=500, height=500)
500 with report.create(pl.Figure()) as figure:
501 figure.add_image(
502 str(plot_path.relative_to(report_dir)),
503 width=pl.utils.NoEscape(r"0.6\textwidth"),
504 )
505 figure.add_caption(
506 "Empirical comparison between the Single Best Solver and the Selector"
507 )
508 label = r"label{fig:sbsvsselector" + str(time_stamp) + r"}"
509 figure.append(pl.UnsafeCommand(f"{label}"))
511 # Comparison between the actual portfolio selector in Sparkle and the VBS.
512 vbs_performance = scenario_output.vbs_performance_data.tolist()
513 df = pd.DataFrame(
514 [vbs_performance, selector_performance],
515 index=["Virtual Best Solver", "Selector"],
516 dtype=float,
517 ).T
518 plot = latex.comparison_plot(df, "Virtual Best Solver vs Selector")
519 plot_path = (
520 plot_dir
521 / f"Virtual_Best_Solver_vs_Selector_{scenario.selector.model_class.__name__}.pdf"
522 )
523 plot.write_image(plot_path, width=500, height=500)
524 with report.create(pl.Figure()) as figure:
525 figure.add_image(
526 str(plot_path.relative_to(report_dir)),
527 width=pl.utils.NoEscape(r"0.6\textwidth"),
528 )
529 figure.add_caption(
530 "Empirical comparison between the Virtual Best Solver and the Selector"
531 )
532 figure.append(pl.UnsafeCommand(r"label{fig:vbsvsselector}"))
534 if scenario_output.test_sets:
535 report.append(pl.Subsection("Test Results"))
536 report.append("The following results are reported on the test set(s):")
537 with report.create(pl.Itemize()) as latex_list:
538 for test_set_name, test_set_size in scenario_output.test_sets:
539 result = round(
540 scenario_output.test_set_performance[test_set_name], MAX_DEC
541 )
542 latex_list.add_item(
543 pl.UnsafeCommand(
544 f"textbf{{{test_set_name}}} ({test_set_size} instances): {result}"
545 )
546 )
549def generate_parallel_portfolio_section(
550 report: pl.Document, scenario: PerformanceDataFrame
551) -> None:
552 """Generate a section for a parallel portfolio scenario."""
553 report_dir = Path(report.default_filepath).parent
554 portfolio_name = scenario.csv_filepath.parent.name
555 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
556 plot_dir = report_dir / f"{portfolio_name.replace(' ', '_')}_plots_{time_stamp}"
557 plot_dir.mkdir()
558 report.append(pl.Section(f"Parallel Portfolio {portfolio_name}"))
559 report.append(
560 "In this scenario, Sparkle runs the portfolio of Solvers on each instance in "
561 "parallel with "
562 f"{gv.settings().parallel_portfolio_num_seeds_per_solver} different "
563 "seeds. The cutoff time for each solver run is set to "
564 f"{gv.settings().solver_cutoff_time} seconds."
565 )
566 report.append(pl.Subsection("Solvers & Instance Sets"))
567 report.append("The following Solvers were used in the portfolio:")
568 # 1. Report on the Solvers and Instance Sets used for the portfolio
569 with report.create(pl.Itemize()) as solver_latex_list:
570 configs = scenario.configurations
571 for solver in scenario.solvers:
572 solver_name = solver.replace("_", " ")
573 solver_latex_list.add_item(
574 pl.UnsafeCommand(
575 f"textbf{{{solver_name}}} ({len(configs[solver])} configurations)"
576 )
577 )
578 report.append("The following Instance Sets were used in the portfolio:")
579 instance_sets = set(Path(instance).parent.name for instance in scenario.instances)
580 instance_set_count = [
581 len([i for i in scenario.instances if Path(i).parent.name == s])
582 for s in instance_sets
583 ]
584 with report.create(pl.Itemize()) as instance_set_latex_list:
585 for set_name, set_size in zip(instance_sets, instance_set_count):
586 set_name = set_name.replace("_", " ") # Latex fix
587 instance_set_latex_list.add_item(
588 pl.UnsafeCommand(f"textbf{{{set_name}}} ({set_size} instances)")
589 )
590 # 2. List which solver was the best on how many instances
591 report.append(pl.Subsection("Portfolio Performance"))
592 objective = scenario.objectives[0]
593 report.append(
594 f"The objective for the portfolio is {objective}. The "
595 "following performance of the solvers was found over the instances: "
596 )
597 best_solver_count = {solver: 0 for solver in scenario.solvers}
598 for instance in scenario.instances:
599 ranking = scenario.get_solver_ranking(objective=objective, instances=[instance])
600 best_solver_count[ranking[0][0]] += 1
602 with report.create(pl.Itemize()) as latex_list:
603 for solver, count in best_solver_count.items():
604 solver_name = solver.replace("_", " ")
605 latex_list.add_item(
606 pl.UnsafeCommand(
607 f"textbf{{{solver_name}}} was the best solver on {count} instance(s)."
608 )
609 )
610 # TODO Report how many instances remained unsolved
612 # 3. Create table showing the performance of the portfolio vs and all solvers,
613 # by showing the status count and number of times the solver was best
614 solver_cancelled_count = {solver: 0 for solver in scenario.solvers}
615 solver_timeout_count = {solver: 0 for solver in scenario.solvers}
616 status_objective = [
617 o for o in scenario.objective_names if o.lower().startswith("status")
618 ][0]
619 cancelled_status = [
620 SolverStatus.UNKNOWN,
621 SolverStatus.CRASHED,
622 SolverStatus.WRONG,
623 SolverStatus.ERROR,
624 SolverStatus.KILLED,
625 ]
626 for solver in scenario.solvers:
627 status = scenario.get_value(solver=solver, objective=status_objective)
628 for status in scenario.get_value(solver=solver, objective=status_objective):
629 status = SolverStatus(status)
630 if status in cancelled_status:
631 solver_cancelled_count[solver] += 1
632 elif status == SolverStatus.TIMEOUT:
633 solver_timeout_count[solver] += 1
635 report.append(latex.AutoRef("tab:parallelportfoliotable"))
636 report.append(pl.utils.bold(" "))
637 report.append(" shows the performance of the portfolio on the test set(s).")
638 tabular = pl.Tabular("r|rrrr")
639 tabular.add_row(["Solver", objective, "# Timeouts", "# Cancelled", "# Best"])
640 tabular.add_hline()
641 solver_performance = {
642 solver: round(performance, MAX_DEC)
643 for solver, _, performance in scenario.get_solver_ranking(objective=objective)
644 }
645 for solver in scenario.solvers:
646 tabular.add_row(
647 solver,
648 solver_performance[solver],
649 solver_timeout_count[solver],
650 solver_cancelled_count[solver],
651 best_solver_count[solver],
652 )
653 tabular.add_hline()
654 portfolio_performance = round(
655 scenario.best_performance(objective=objective), MAX_DEC
656 )
657 tabular.add_row(
658 portfolio_name,
659 portfolio_performance,
660 sum(solver_timeout_count.values()),
661 sum(solver_cancelled_count.values()),
662 sum(best_solver_count.values()),
663 )
664 table_portfolio = pl.Table(position="h")
665 table_portfolio.append(pl.UnsafeCommand("centering"))
666 table_portfolio.append(tabular)
667 table_portfolio.add_caption("Parallel Portfolio Performance")
668 table_portfolio.append(pl.UnsafeCommand(r"label{tab:parallelportfoliotable}"))
669 report.append(table_portfolio)
671 # 4. Create scatter plot analysis between the portfolio and the single best solver
672 sbs_name = scenario.get_solver_ranking(objective=objective)[0][0]
673 sbs_instance_performance = scenario.get_value(
674 solver=sbs_name, objective=objective.name
675 )
676 sbs_name = Path(sbs_name).name
677 report.append(latex.AutoRef("fig:portfoliovssbs"))
678 report.append(pl.utils.bold(" "))
679 report.append(
680 " shows the emprical comparison between the portfolio and the single "
681 f"best solver (SBS) {sbs_name}."
682 )
683 portfolio_instance_performance = scenario.best_instance_performance(
684 objective=objective.name
685 ).tolist()
687 df = pd.DataFrame(
688 [sbs_instance_performance, portfolio_instance_performance],
689 index=[f"SBS ({sbs_name}) Performance", "Portfolio Performance"],
690 dtype=float,
691 ).T
692 plot = latex.comparison_plot(df, None)
693 plot_path = plot_dir / f"sbs_{sbs_name}_vs_parallel_portfolio.pdf"
694 plot.write_image(plot_path, width=500, height=500)
695 with report.create(pl.Figure(position="h")) as figure:
696 figure.add_image(
697 str(plot_path.relative_to(report_dir)),
698 width=pl.utils.NoEscape(r"0.6\textwidth"),
699 )
700 figure.add_caption(f"Portfolio vs SBS Performance ({objective})")
701 figure.append(pl.UnsafeCommand(r"label{fig:portfoliovssbs}"))
704def append_dataframe_longtable(
705 report: pl.Document,
706 df: pd.DataFrame,
707 caption: str,
708 label: str,
709 max_cols: int = MAX_COLS_PER_TABLE,
710 wide_threshold: int = WIDE_TABLE_THRESHOLD,
711 num_keys: int = NUM_KEYS_PDF,
712) -> None:
713 """Appends a pandas DataFrame to a PyLaTeX document as one or more LaTeX longtables.
715 Args:
716 report: The PyLaTeX document to which the table(s) will be appended.
717 df: The DataFrame to be rendered as LaTeX longtable(s).
718 caption: The caption for the table(s).
719 label: The LaTeX label for referencing the table(s).
720 max_cols: Maximum number of columns per table chunk.
721 Defaults to MAX_COLS_PER_TABLE.
722 wide_threshold: Number of columns above which the table is rotated
723 to landscape. Defaults to WIDE_TABLE_THRESHOLD.
724 num_keys: Number of key columns to include in each table chunk.
725 Defaults to NUM_KEYS_PDF.
727 Returns:
728 None
729 """
730 import math
731 from typing import Union
733 def latex_escape_text(string: str) -> str:
734 """Escape special LaTeX characters in a string."""
735 # escape text, but insert our own LaTeX macro around it
736 return (
737 string.replace("\\", r"\textbackslash{}")
738 .replace("&", r"\&")
739 .replace("%", r"\%")
740 .replace("$", r"\$")
741 .replace("#", r"\#")
742 .replace("_", r"\_")
743 .replace("{", r"\{")
744 .replace("}", r"\}")
745 .replace("~", r"\textasciitilde{}")
746 .replace("^", r"\textasciicircum{}")
747 )
749 def last_path_segment(text: str) -> str:
750 """Keep only the last non-empty path-like segment. Handles both back and forwardslashes. Removes any leading/trailing slashes."""
751 stripped_text = str(text).strip().replace("\\", "/")
752 segments = [
753 segment for segment in stripped_text.split("/") if segment
754 ] # ignore empty segments
755 return segments[-1] if segments else ""
757 def wrap_fixed_shortstack(cell: str, width: int = MAX_CELL_LEN) -> str:
758 """Wrap long text to a fixed width for LaTeX tables."""
759 string_cell = last_path_segment(cell)
760 if len(string_cell) <= width:
761 return latex_escape_text(string_cell)
762 chunks = [
763 latex_escape_text(string_cell[index : index + width])
764 for index in range(0, len(string_cell), width)
765 ]
766 # left-aligned shortstack: forces line breaks and grows row height
767 return r"\shortstack[l]{" + r"\\ ".join(chunks) + "}"
769 def wrap_header_labels(
770 df: pd.DataFrame, width_per_cell: int = MAX_CELL_LEN
771 ) -> pd.DataFrame:
772 """Wrap long header labels to a fixed width for LaTeX tables."""
773 df_copy = df.copy()
774 if isinstance(df_copy.columns, pd.MultiIndex):
775 new_cols = []
776 for column in df_copy.columns:
777 new_cols.append(
778 tuple(
779 wrap_fixed_shortstack(last_path_segment(index), width_per_cell)
780 if isinstance(index, str)
781 else index
782 for index in column
783 )
784 )
785 names = [
786 (
787 wrap_fixed_shortstack(last_path_segment(name), width_per_cell)
788 if isinstance(name, str)
789 else name
790 )
791 for name in (df_copy.columns.names or [])
792 ]
793 df_copy.columns = pd.MultiIndex.from_tuples(new_cols, names=names)
794 else:
795 df_copy.columns = [
796 wrap_fixed_shortstack(last_path_segment(column), width_per_cell)
797 if isinstance(column, str)
798 else column
799 for column in df_copy.columns
800 ]
801 return df_copy
803 def format_cell(cell: Union[int, float, str]) -> str:
804 """Format a cell for printing in a LaTeX table."""
805 try:
806 float_cell = float(cell)
807 except (TypeError, ValueError):
808 return wrap_fixed_shortstack(last_path_segment(str(cell)), MAX_CELL_LEN)
810 if not math.isfinite(float_cell):
811 return "NaN"
813 if float_cell.is_integer():
814 return str(int(float_cell))
815 # round to MAX_DEC, then strip trailing zeros
816 stripped_cell = f"{round(float_cell, MAX_DEC):.{MAX_DEC}f}".rstrip("0").rstrip(
817 "."
818 )
819 return stripped_cell
821 df_copy = df.copy()
823 # Inorder to be able to show the key columns, we need to reset the index
824 if not isinstance(df_copy.index, pd.RangeIndex) and df_copy.index.name in (
825 None,
826 "index",
827 "",
828 ):
829 df_copy = df_copy.reset_index()
831 # Remove the Seed column from the performance dataframe since it is not
832 # very informative and clutters the table
833 if isinstance(df, PerformanceDataFrame):
834 mask = df_copy.columns.get_level_values("Meta") == "Seed"
835 df_copy = df_copy.loc[:, ~mask]
837 # For performance dataframe, we want to show values of objectives with their corresponding instance and run.
838 # Since objective, instance and run are indexes in the performance dataframe,
839 # they will be part of the index and we need to reset the index to get them
840 # as columns.
841 # We'll name them as key columns, since they are the key to identify the value of the objective
842 # for a given instance and run.
843 # (Respectively FeatureGroup, FeatureName, Extractor in feature dataframe)
844 keys = df_copy.iloc[:, :num_keys] # Key columns
846 # Split the dataframe into chunks of max_cols per page
847 number_column_chunks = max((df_copy.shape[1] - 1) // max_cols + 1, 1)
848 for i in range(number_column_chunks):
849 report.append(NewPage())
850 full_part = None
851 start_col = i * max_cols
852 end_col = (i + 1) * max_cols
854 # Select the value columns for this chunk
855 values = df_copy.iloc[
856 :,
857 start_col + num_keys : end_col + num_keys,
858 ]
860 # Concatenate the key and value columns
861 full_part = pd.concat([keys, values], axis=1)
863 # If there are no value columns left, we are done
864 if (full_part.shape[1]) <= num_keys:
865 break
867 full_part_wrapped = wrap_header_labels(full_part, MAX_CELL_LEN)
869 # tell pandas how to print numbers
870 formatters = {col: format_cell for col in full_part_wrapped.columns}
872 tex = full_part_wrapped.to_latex(
873 longtable=True,
874 index=False,
875 escape=False, # We want to split the long words, not escape them
876 caption=caption + (f" (part {i + 1})" if number_column_chunks > 1 else ""),
877 label=label + f"-p{i + 1}" if number_column_chunks > 1 else label,
878 float_format=None,
879 multicolumn=True,
880 multicolumn_format="c",
881 multirow=False,
882 column_format="c" * full_part_wrapped.shape[1],
883 formatters=formatters,
884 )
886 # centre the whole table horizontally
887 centred_tex = "\\begin{center}\n" + tex + "\\end{center}\n"
889 # rotate if still too wide
890 if full_part_wrapped.shape[1] > wide_threshold:
891 report.append(NoEscape(r"\begin{landscape}"))
892 report.append(NoEscape(centred_tex))
893 report.append(NoEscape(r"\end{landscape}"))
894 else:
895 report.append(NoEscape(centred_tex))
898def generate_appendix(
899 report: pl.Document,
900 performance_data: PerformanceDataFrame,
901 feature_data: FeatureDataFrame,
902) -> None:
903 """Appendix.
905 Args:
906 report: The LaTeX document object to which the appendix will be added.
907 performance_data: The performance data to be included in the appendix.
908 feature_data: The feature data to be included in the appendix.
910 Returns:
911 None
912 """
913 report.packages.append(pl.Package("pdflscape")) # Landscape pages
914 report.packages.append(pl.Package("longtable")) # Long tables
915 report.packages.append(pl.Package("booktabs")) # Better table formatting
916 report.append(pl.NewPage())
917 report.append(pl.NoEscape(r"\clearpage"))
918 report.append(pl.UnsafeCommand("appendix"))
919 report.append(pl.Section("Performance DataFrame"))
921 append_dataframe_longtable(
922 report,
923 performance_data,
924 caption="Performance DataFrame",
925 label="tab:perf_data",
926 max_cols=MAX_COLS_PER_TABLE,
927 wide_threshold=WIDE_TABLE_THRESHOLD,
928 num_keys=NUM_KEYS_PDF,
929 )
931 report.append(pl.Section("Feature DataFrame"))
932 append_dataframe_longtable(
933 report,
934 feature_data,
935 caption="Feature DataFrame",
936 label="tab:feature_data",
937 max_cols=MAX_COLS_PER_TABLE,
938 wide_threshold=WIDE_TABLE_THRESHOLD,
939 num_keys=NUM_KEYS_FDF,
940 )
943def main(argv: list[str]) -> None:
944 """Generate a report for executed experiments in the platform."""
945 # Log command call
946 sl.log_command(sys.argv, gv.settings().random_state)
948 # Define command line arguments
949 parser = parser_function()
951 # Process command line arguments
952 args = parser.parse_args(argv)
954 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
955 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
957 # Fetch all known scenarios
958 configuration_scenarios = gv.configuration_scenarios(refresh=True)
959 selection_scenarios = gv.selection_scenarios(refresh=True)
960 parallel_portfolio_scenarios = gv.parallel_portfolio_scenarios()
962 # Filter scenarios based on args
963 if args.solvers:
964 solvers = [
965 resolve_object_name(
966 solver,
967 gv.solver_nickname_mapping,
968 gv.settings().DEFAULT_solver_dir,
969 Solver,
970 )
971 for solver in args.solvers
972 ]
973 configuration_scenarios = [
974 scenario
975 for scenario in configuration_scenarios
976 if scenario.solver.directory in [solver.directory for solver in solvers]
977 ]
978 selection_scenarios = [
979 scenario
980 for scenario in selection_scenarios
981 if set(scenario.solvers).intersection(
982 [str(solver.directory) for solver in solvers]
983 )
984 ]
985 parallel_portfolio_scenarios = [
986 scenario
987 for scenario in parallel_portfolio_scenarios
988 if set(scenario.solvers).intersection(
989 [str(solver.directory) for solver in solvers]
990 )
991 ]
992 if args.instance_sets:
993 instance_sets = [
994 resolve_object_name(
995 instance_set,
996 gv.instance_set_nickname_mapping,
997 gv.settings().DEFAULT_instance_dir,
998 Instance_Set,
999 )
1000 for instance_set in args.instance_sets
1001 ]
1002 configuration_scenarios = [
1003 scenario
1004 for scenario in configuration_scenarios
1005 if scenario.instance_set.directory
1006 in [instance_set.directory for instance_set in instance_sets]
1007 ]
1008 selection_scenarios = [
1009 scenario
1010 for scenario in selection_scenarios
1011 if set(scenario.instance_sets).intersection(
1012 [str(instance_set.name) for instance_set in instance_sets]
1013 )
1014 ]
1015 parallel_portfolio_scenarios = [
1016 scenario
1017 for scenario in parallel_portfolio_scenarios
1018 if set(scenario.instance_sets).intersection(
1019 [str(instance_set.name) for instance_set in instance_sets]
1020 )
1021 ]
1023 processed_configuration_scenarios = []
1024 processed_selection_scenarios = []
1025 possible_test_sets = [
1026 Instance_Set(possible_test_set)
1027 for possible_test_set in gv.settings().DEFAULT_instance_dir.iterdir()
1028 ]
1029 for configuration_scenario in configuration_scenarios:
1030 processed_configuration_scenarios.append(
1031 (
1032 ConfigurationOutput(
1033 configuration_scenario, performance_data, possible_test_sets
1034 ),
1035 configuration_scenario,
1036 )
1037 )
1038 for selection_scenario in selection_scenarios:
1039 processed_selection_scenarios.append(
1040 (SelectionOutput(selection_scenario), selection_scenario)
1041 )
1042 if (
1043 not configuration_scenarios
1044 and not selection_scenarios
1045 and not parallel_portfolio_scenarios
1046 ):
1047 print("No scenarios found. Exiting.")
1048 sys.exit(-1)
1049 raw_output = gv.settings().DEFAULT_output_analysis / "JSON"
1050 if raw_output.exists(): # Clean
1051 shutil.rmtree(raw_output)
1052 raw_output.mkdir()
1054 # Write JSON
1055 output_json = {}
1056 for output, configuration_scenario in processed_configuration_scenarios:
1057 output_json[configuration_scenario.name] = output.serialise()
1058 for output, selection_scenario in processed_selection_scenarios:
1059 output_json[selection_scenario.name] = output.serialise()
1060 # TODO: We do not have an output object for parallel portfolios
1062 raw_output_json = raw_output / "output.json"
1063 with raw_output_json.open("w") as f:
1064 json.dump(output_json, f, indent=4)
1066 print(f"Machine readable output written to: {raw_output_json}")
1068 if args.only_json: # Done
1069 sys.exit(0)
1071 # TODO: Group scenarios based on:
1072 # - Configuration / Selection / Parallel Portfolio
1073 # - Training Instance Set / Testing Instance Set
1074 # - Configurators can be merged as long as we can match their budgets clearly
1075 report_directory = gv.settings().DEFAULT_output_analysis / "report"
1076 if report_directory.exists(): # Clean it
1077 shutil.rmtree(report_directory)
1078 report_directory.mkdir()
1079 target_path = report_directory / "report"
1080 report = pl.document.Document(
1081 default_filepath=str(target_path), document_options=["british"]
1082 )
1083 bibpath = gv.settings().bibliography_path
1084 newbibpath = report_directory / "report.bib"
1085 shutil.copy(bibpath, newbibpath)
1086 # BUGFIX for unknown package load in PyLatex
1087 lastpage_package = pl.package.Package("lastpage")
1088 if lastpage_package in report.packages:
1089 report.packages.remove(lastpage_package)
1090 report.packages.append(
1091 pl.package.Package(
1092 "geometry",
1093 options=[
1094 "verbose",
1095 "tmargin=3.5cm",
1096 "bmargin=3.5cm",
1097 "lmargin=3cm",
1098 "rmargin=3cm",
1099 ],
1100 )
1101 )
1102 # Unsafe command for \emph{Sparkle}
1103 report.preamble.extend(
1104 [
1105 pl.UnsafeCommand("title", r"\emph{Sparkle} Algorithm Portfolio report"),
1106 pl.UnsafeCommand(
1107 "author",
1108 r"Generated by \emph{Sparkle} "
1109 f"(version: {__sparkle_version__})",
1110 ),
1111 ]
1112 )
1113 report.append(pl.Command("maketitle"))
1114 report.append(pl.Section("Introduction"))
1115 # TODO: A quick overview to the introduction on whats considered in the report
1116 # regarding Solvers, Instance Sets and Feature Extractors
1117 solver_tool = (
1118 "RunSolver" if gv.settings().DEFAULT_runsolver_exec.exists() else "PyRunSolver"
1119 )
1120 report.append(
1121 pl.UnsafeCommand(
1122 r"emph{Sparkle}~\cite{Hoos15} is a multi-agent problem-solving platform based on"
1123 r" Programming by Optimisation (PbO)~\cite{Hoos12}, and would provide a number "
1124 "of effective algorithm optimisation techniques (such as automated algorithm "
1125 "configuration, portfolio-based algorithm selection, etc.) to accelerate the "
1126 f"existing solvers. All computation and memory measurements are done by {solver_tool}."
1127 )
1128 )
1130 for scenario_output, scenario in processed_configuration_scenarios:
1131 generate_configuration_section(report, scenario, scenario_output)
1133 for scenario_output, scenario in processed_selection_scenarios:
1134 generate_selection_section(report, scenario, scenario_output)
1136 for parallel_dataframe in parallel_portfolio_scenarios:
1137 generate_parallel_portfolio_section(report, parallel_dataframe)
1139 # Check if user wants to add appendix and
1140 settings = gv.settings(args)
1141 if settings.appendices:
1142 generate_appendix(report, performance_data, feature_data)
1144 # Adding bibliography
1145 report.append(pl.NewPage()) # Ensure it starts on new page
1146 report.append(pl.Command("bibliographystyle", arguments=["plain"]))
1147 report.append(pl.Command("bibliography", arguments=[str(newbibpath)]))
1148 # Generate the report .tex and .pdf
1149 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")
1150 # TODO: This should be done by PyLatex. Generate the bib and regenerate the report
1151 # Reference for the (terrible) solution: https://tex.stackexchange.com/
1152 # questions/63852/question-mark-or-bold-citation-key-instead-of-citation-number
1153 import subprocess
1155 # Run BibTex silently
1156 subprocess.run(
1157 ["bibtex", newbibpath.with_suffix("")],
1158 stdout=subprocess.DEVNULL,
1159 stderr=subprocess.DEVNULL,
1160 )
1161 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")
1162 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")
1163 print(f"Report generated at {target_path}.pdf")
1164 sys.exit(0)
1167if __name__ == "__main__":
1168 main(sys.argv[1:])