Coverage for sparkle/CLI/generate_report.py: 22%
469 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
1#!/usr/bin/env python3
2"""Sparkle command to generate a report for an executed experiment."""
4import sys
5import shutil
6import argparse
7from pathlib import Path
8import time
9import json
10import pandas as pd
12from pylatex import NoEscape, NewPage
13import pylatex as pl
14from sparkle import __version__ as __sparkle_version__
16from sparkle.CLI.help import global_variables as gv
17from sparkle.CLI.help import resolve_object_name
18from sparkle.CLI.help import logging as sl
19from sparkle.CLI.help import argparse_custom as ac
21from sparkle.solver import Solver
22from sparkle.instance import Instance_Set
23from sparkle.selector import Extractor
24from sparkle.structures import PerformanceDataFrame, FeatureDataFrame
25from sparkle.configurator.configurator import ConfigurationScenario
26from sparkle.selector.selector import SelectionScenario
27from sparkle.types import SolverStatus
28from sparkle.platform import Settings
30from sparkle.platform import latex
31from sparkle.platform.output.configuration_output import ConfigurationOutput
32from sparkle.platform.output.selection_output import SelectionOutput
35MAX_DEC = 4 # Maximum decimals used for each reported value
36MAX_COLS_PER_TABLE = 2 # number of value columns extra to number of key columns
37WIDE_TABLE_THRESHOLD = 4 # columns above which we switch to landscape
38NUM_KEYS_PDF = 3
39NUM_KEYS_FDF = 3
40MAX_CELL_LEN = 17
43def parser_function() -> argparse.ArgumentParser:
44 """Define the command line arguments."""
45 parser = argparse.ArgumentParser(
46 description="Generates a report for all known selection, configuration and "
47 "parallel portfolio scenarios will be generated.",
48 epilog="If you wish to filter specific solvers, instance sets, ... have a look "
49 "at the command line arguments.",
50 )
51 # Add argument for filtering solvers
52 parser.add_argument(
53 *ac.SolversReportArgument.names, **ac.SolversReportArgument.kwargs
54 )
55 # Add argument for filtering instance sets
56 parser.add_argument(
57 *ac.InstanceSetsReportArgument.names, **ac.InstanceSetsReportArgument.kwargs
58 )
60 # Add argument for filtering appendix
61 parser.add_argument(
62 *Settings.OPTION_appendices.args, **Settings.OPTION_appendices.kwargs
63 )
65 # Add argument for filtering configurators?
66 # Add argument for filtering selectors?
67 # Add argument for filtering ??? scenario ids? configuration ids?
68 parser.add_argument(*ac.GenerateJSONArgument.names, **ac.GenerateJSONArgument.kwargs)
69 return parser
72def generate_configuration_section(
73 report: pl.Document,
74 scenario: ConfigurationScenario,
75 scenario_output: ConfigurationOutput,
76) -> None:
77 """Generate a section for a configuration scenario."""
78 report_dir = Path(report.default_filepath).parent
79 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
80 plot_dir = (
81 report_dir
82 / f"{scenario.configurator.__name__}_{scenario.name}_plots_{time_stamp}"
83 )
84 plot_dir.mkdir(exist_ok=True)
86 # 1. Write section intro
87 report.append(
88 pl.Section(
89 f"{scenario.configurator.__name__} Configuration: "
90 f"{scenario.solver.name} on {scenario.instance_set.name}"
91 )
92 )
93 report.append("In this scenario, ")
94 report.append(
95 pl.UnsafeCommand(
96 f"textbf{{{scenario.configurator.__name__}}} "
97 f"({scenario.configurator.full_name})~\\cite"
98 f"{{{scenario.configurator.__name__}}} with version "
99 f"{scenario.configurator.version} was used for configuration. "
100 )
101 )
102 report.append(
103 f"The Solver {scenario.solver} was optimised on training set "
104 f"{scenario.instance_set}. The scenario was run {scenario.number_of_runs} "
105 f"times independently with different seeds, yielding {scenario.number_of_runs} "
106 f"configurations. The cutoff time for the solver was set to "
107 f"{scenario.solver_cutoff_time} seconds. The optimised objective is "
108 f"{scenario.sparkle_objectives[0]}. Each Configuration was evaluated on the "
109 "training set to determine the best configuration, e.g. the best "
110 f"{scenario.sparkle_objectives[0]} value on the training set."
111 )
113 # 2. Report all the configurator settings in table format
114 report.append(pl.Subsection("Configurator Settings"))
115 report.append(
116 f"The following settings were used for {scenario.configurator.__name__}:\n"
117 )
118 tabular = pl.Tabular("l|r")
119 tabular.add_row("Setting", "Value")
120 tabular.add_hline()
121 for setting, value in scenario.serialise().items():
122 # Keep only the last path segment for paths
123 # Otherwise tables get too wide and we can't see other values
124 t = str(value).strip().replace("\\", "/")
125 parts = [p for p in t.split("/") if p]
126 if parts[-1]:
127 tabular.add_row([setting, parts[-1]])
128 else:
129 tabular.add_row([setting, "None"])
130 table_conf_settings = pl.Table(position="h")
131 table_conf_settings.append(pl.UnsafeCommand("centering"))
132 table_conf_settings.append(tabular)
133 table_conf_settings.add_caption("Configurator Settings")
134 report.append(table_conf_settings)
136 # 3. Report details on instance and solver used
137 report.append(pl.Subsection("Solver & Instance Set(s) Details"))
138 cs = scenario_output.solver.get_configuration_space()
139 report.append(
140 f"The solver {scenario_output.solver} was configured using "
141 f"{len(cs.values())} configurable (hyper)parameters. "
142 f"The configuration space has {len(cs.conditions)} conditions. "
143 )
144 report.append("The following instance sets were used for the scenario:")
145 with report.create(pl.Itemize()) as instance_set_latex_list:
146 for instance_set in [
147 scenario_output.instance_set_train
148 ] + scenario_output.test_instance_sets:
149 training_set_name = instance_set.name.replace("_", " ") # Latex fix
150 instance_set_latex_list.add_item(
151 pl.UnsafeCommand(
152 f"textbf{{{training_set_name}}} ({instance_set.size} instances)"
153 )
154 )
156 # Function to generate a results summary of default vs best on an instance set
157 def instance_set_summary(instance_set_name: str) -> None:
158 """Generate a results summary of default vs best on an instance set."""
159 instance_set_results = scenario_output.instance_set_results[instance_set_name]
160 report.append(
161 f"The {scenario.sparkle_objectives[0]} value of the Default "
162 f"Configuration on {instance_set_name} was "
163 )
164 report.append(
165 pl.UnsafeCommand(
166 f"textbf{{{round(instance_set_results.default_performance, MAX_DEC)}}}.\n"
167 )
168 )
169 report.append(
170 f"The {scenario.sparkle_objectives[0]} value of the Best "
171 f"Configuration on {instance_set_name} was "
172 )
173 report.append(
174 pl.UnsafeCommand(
175 f"textbf{{{round(instance_set_results.best_performance, MAX_DEC)}}}.\n"
176 )
177 )
178 report.append("In ")
179 report.append(latex.AutoRef(f"fig:bestvsdefault{instance_set_name}{time_stamp}"))
180 report.append(pl.utils.bold(" ")) # Force white space
181 report.append("the results are plotted per instance.")
182 # Create graph to compare best configuration vs default on the instance set
184 df = pd.DataFrame(
185 [
186 instance_set_results.default_instance_performance,
187 instance_set_results.best_instance_performance,
188 ],
189 index=["Default Configuration", "Best Configuration"],
190 dtype=float,
191 ).T
192 plot = latex.comparison_plot(df, None)
193 plot_path = (
194 plot_dir / f"{scenario_output.best_configuration_key}_vs_"
195 f"Default_{instance_set_name}.pdf"
196 )
197 plot.write_image(plot_path, width=500, height=500)
198 with report.create(pl.Figure(position="h")) as figure:
199 figure.add_image(
200 str(plot_path.relative_to(report_dir)),
201 width=pl.utils.NoEscape(r"0.6\textwidth"),
202 )
203 figure.add_caption(
204 f"Best vs Default Performance on {instance_set_name} "
205 f"({scenario.sparkle_objectives[0]})"
206 )
207 figure.append(
208 pl.UnsafeCommand(
209 r"label{"
210 f"fig:bestvsdefault{instance_set_name}{time_stamp}"
211 r"}"
212 )
213 )
214 if scenario.sparkle_objectives[0].time: # Write status table
215 report.append("The following Solver status were found per instance:")
216 tabular = pl.Tabular("l|c|c|c")
217 tabular.add_row("Status", "Default", "Best", "Overlap")
218 tabular.add_hline()
219 # Count the statuses
220 for status in SolverStatus:
221 default_count, best_count, overlap_count = 0, 0, 0
222 for instance in instance_set_results.instance_status_default.keys():
223 instance = str(instance)
224 default_hit = (
225 instance_set_results.instance_status_default[instance] == status
226 )
227 best_hit = (
228 instance_set_results.instance_status_best[instance] == status
229 )
230 default_count += default_hit
231 best_count += best_hit
232 overlap_count += default_hit and best_hit
233 if default_count or best_count:
234 tabular.add_row(status, default_count, best_count, overlap_count)
235 table_status_values = pl.Table(position="h")
236 table_status_values.append(pl.UnsafeCommand("centering"))
237 table_status_values.append(tabular)
238 table_status_values.add_caption(
239 "Status count for the best and default configuration."
240 )
241 report.append(table_status_values)
243 # 4. Report the results of the best configuration on the training set vs the default
244 report.append(
245 pl.Subsection(
246 f"Comparison of Default and Best Configuration on Training Set "
247 f"{scenario_output.instance_set_train.name}"
248 )
249 )
250 instance_set_summary(scenario_output.instance_set_train.name)
252 # 5. Report the actual config values
253 report.append(pl.Subsubsection("Best Configuration Values"))
254 if (
255 scenario_output.best_configuration_key
256 == PerformanceDataFrame.default_configuration
257 ):
258 report.append(
259 "The configurator failed to find a better configuration than the "
260 "default configuration on the training set in this scenario."
261 )
262 else:
263 report.append(
264 "The following parameter values "
265 "were found to be the best on the training set:\n"
266 )
267 tabular = pl.Tabular("l|r")
268 tabular.add_row("Parameter", "Value")
269 tabular.add_hline()
270 for parameter, value in scenario_output.best_configuration.items():
271 tabular.add_row([parameter, str(value)])
272 table_best_values = pl.Table(position="h")
273 table_best_values.append(pl.UnsafeCommand("centering"))
274 table_best_values.append(tabular)
275 table_best_values.add_caption("Best found configuration values")
276 report.append(table_best_values)
278 # 6. Report the results of best vs default conf on the test sets
280 for test_set in scenario_output.test_instance_sets:
281 report.append(
282 pl.Subsection(
283 f"Comparison of Default and Best Configuration on Test Set "
284 f"{test_set.name}"
285 )
286 )
287 instance_set_summary(test_set.name)
289 # 7. Report the parameter ablation scenario if present
290 if scenario.ablation_scenario:
291 report.append(pl.Subsection("Parameter importance via Ablation"))
292 report.append("Ablation analysis ")
293 report.append(pl.UnsafeCommand(r"cite{FawcettHoos16} "))
294 test_set = scenario.ablation_scenario.test_set
295 if not scenario.ablation_scenario.test_set:
296 test_set = scenario.ablation_scenario.train_set
297 report.append(
298 f"is performed from the default configuration of {scenario.solver} to the "
299 f"best found configuration ({scenario_output.best_configuration_key}) "
300 "to see which parameter changes between them contribute most to the improved"
301 " performance. The ablation path uses the training set "
302 f"{scenario.ablation_scenario.train_set.name} and validation is performed "
303 f"on the test set {test_set.name}. The set of parameters that differ in the "
304 "two configurations will form the ablation path. Starting from the default "
305 "configuration, the path is computed by performing a sequence of rounds. In"
306 " a round, each available parameter is flipped in the configuration and is "
307 "validated on its performance. The flipped parameter with the best "
308 "performance in that round, is added to the configuration and the next round"
309 " starts with the remaining parameters. This repeats until all parameters "
310 "are flipped, which is the best found configuration. The analysis resulted "
311 "in the ablation presented in "
312 )
313 report.append(latex.AutoRef("tab:ablationtable"))
314 report.append(".")
316 # Add ablation table
317 tabular = pl.Tabular("r|l|r|r|r")
318 data = scenario.ablation_scenario.read_ablation_table()
319 for index, row in enumerate(data):
320 tabular.add_row(*row)
321 if index == 0:
322 tabular.add_hline()
323 table_ablation = pl.Table(position="h")
324 table_ablation.append(pl.UnsafeCommand("centering"))
325 table_ablation.append(tabular)
326 table_ablation.add_caption("Ablation table")
327 table_ablation.append(pl.UnsafeCommand(r"label{tab:ablationtable}"))
328 report.append(table_ablation)
331def generate_selection_section(
332 report: pl.Document, scenario: SelectionScenario, scenario_output: SelectionOutput
333) -> None:
334 """Generate a section for a selection scenario."""
335 report_dir = Path(report.default_filepath).parent
336 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
337 plot_dir = report_dir / f"{scenario.name.replace(' ', '_')}_plots_{time_stamp}"
338 plot_dir.mkdir(exist_ok=True)
339 report.append(
340 pl.Section(
341 f"Selection: {scenario.selector.model_class.__name__} on "
342 f"{' '.join([s[0] for s in scenario_output.training_instance_sets])}"
343 )
344 )
345 report.append(
346 f"In this scenario, a {scenario.selector.model_class.__name__} "
347 f" ({scenario.selector.selector_class.__name__}) was trained on the "
348 "performance and feature data using ASF-lib. The following solvers "
349 f"were run with a cutoff time of {scenario.solver_cutoff} seconds:"
350 )
351 with report.create(pl.Itemize()) as solver_latex_list:
352 for solver_name in scenario_output.solvers.keys():
353 solver_name = solver_name.replace("_", " ")
354 solver_latex_list.add_item(
355 pl.UnsafeCommand(
356 f"textbf{{{solver_name}}} "
357 f"({len(scenario_output.solvers[solver_name])} configurations)"
358 )
359 )
360 # Report training instance sets
361 report.append("The following training instance sets were used:")
362 with report.create(pl.Itemize()) as instance_set_latex_list:
363 for training_set_name, set_size in scenario_output.training_instance_sets:
364 training_set_name = training_set_name.replace("_", " ") # Latex fix
365 instance_set_latex_list.add_item(
366 pl.UnsafeCommand(f"textbf{{{training_set_name}}} ({set_size} instances)")
367 )
368 # Report feature extractors
369 report.append(
370 "The following feature extractors were used with a extractor cutoff "
371 f"time of {scenario.extractor_cutoff} seconds:"
372 )
373 with report.create(pl.Itemize()) as feature_extractor_latex_list:
374 for feature_extractor_name in scenario.feature_extractors:
375 extractor = resolve_object_name(
376 feature_extractor_name,
377 gv.file_storage_data_mapping[gv.extractor_nickname_list_path],
378 gv.settings().DEFAULT_extractor_dir,
379 class_name=Extractor,
380 )
381 feature_extractor_name = feature_extractor_name.replace("_", " ") # Latex
382 feature_extractor_latex_list.add_item(
383 pl.UnsafeCommand(
384 f"textbf{{{feature_extractor_name}}} "
385 f"({extractor.output_dimension} features)"
386 )
387 )
388 # Report Training results
389 report.append(pl.Subsection("Training Results"))
390 # 1. Report VBS and selector performance, create ranking list of the solvers
391 # TODO Add ref here to the training sets section?
392 report.append(
393 f"In this section, the {scenario.objective.name} results for the "
394 "portfolio selector on solving the training instance set(s) listed "
395 "is reported. "
396 )
397 report.append(
398 f"The {scenario.objective.name} values for the Virtual Best Solver "
399 "(VBS), i.e., the perfect portfolio selector is "
400 )
401 report.append(pl.utils.bold(f"{round(scenario_output.vbs_performance, MAX_DEC)}"))
402 report.append(", the actual portfolio selector performance is ")
403 report.append(
404 pl.utils.bold(f"{round(scenario_output.actual_performance, MAX_DEC)}.\n")
405 )
407 report.append(
408 f"Below, the solvers are ranked based on {scenario.objective.name} performance:"
409 )
410 with report.create(pl.Enumerate()) as ranking_list:
411 for solver_name, conf_id, value in scenario_output.solver_performance_ranking:
412 value = round(value, MAX_DEC)
413 solver_name = solver_name.replace("_", " ") # Latex fix
414 conf_id = conf_id.replace("_", " ") # Latex fix
415 ranking_list.add_item(
416 pl.UnsafeCommand(f"textbf{{{solver_name}}} ({conf_id}): {value}")
417 )
419 # 2. Marginal contribution ranking list VBS
420 report.append(pl.Subsubsection("Marginal Contribution Ranking List"))
421 report.append(
422 "The following list shows the marginal contribution ranking list for the VBS:"
423 )
424 with report.create(pl.Enumerate()) as ranking_list:
425 for (
426 solver_name,
427 conf_id,
428 contribution,
429 performance,
430 ) in scenario_output.marginal_contribution_perfect:
431 contribution, performance = (
432 round(contribution, MAX_DEC),
433 round(performance, MAX_DEC),
434 )
435 solver_name = solver_name.replace("_", " ") # Latex fix
436 conf_id = conf_id.replace("_", " ") # Latex fix
437 ranking_list.add_item(
438 pl.UnsafeCommand(
439 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})"
440 )
441 )
443 # 3. Marginal contribution ranking list actual selector
444 report.append(
445 "The following list shows the marginal contribution ranking list for "
446 "the actual portfolio selector:"
447 )
448 with report.create(pl.Enumerate()) as ranking_list:
449 for (
450 solver_name,
451 conf_id,
452 contribution,
453 performance,
454 ) in scenario_output.marginal_contribution_actual:
455 contribution, performance = (
456 round(contribution, MAX_DEC),
457 round(performance, MAX_DEC),
458 )
459 solver_name = solver_name.replace("_", " ") # Latex fix
460 conf_id = conf_id.replace("_", " ") # Latex fix
461 ranking_list.add_item(
462 pl.UnsafeCommand(
463 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})"
464 )
465 )
467 # 4. Create scatter plot analysis
468 report.append(pl.Subsubsection("Scatter Plot Analysis"))
469 report.append(latex.AutoRef(f"fig:sbsvsselector{time_stamp}"))
470 report.append(pl.utils.bold(" ")) # Trick to force a white space
471 report.append(
472 "shows the empirical comparison between the portfolio "
473 "selector and the single best solver (SBS). "
474 )
475 report.append(latex.AutoRef("fig:vbsvsselector"))
476 report.append(pl.utils.bold(" ")) # Trick to force a white space
477 report.append(
478 "shows the empirical comparison between the actual portfolio selector "
479 "and the virtual best solver (VBS)."
480 )
481 # Create figure on SBS versus the selector
482 sbs_name, sbs_config, _ = scenario_output.solver_performance_ranking[0]
483 # sbs_plot_name = f"{Path(sbs_name).name} ({sbs_config})"
484 sbs_performance = scenario_output.sbs_performance
485 selector_performance = scenario_output.actual_performance_data
487 # Join the data together
489 df = pd.DataFrame(
490 [sbs_performance, selector_performance],
491 index=[f"{Path(sbs_name).name} ({sbs_config})", "Selector"],
492 dtype=float,
493 ).T
494 plot = latex.comparison_plot(df, "Single Best Solver vs Selector")
495 plot_path = (
496 plot_dir / f"{Path(sbs_name).name}_{sbs_config}_vs_"
497 f"Selector_{scenario.selector.model_class.__name__}.pdf"
498 )
499 plot.write_image(plot_path, width=500, height=500)
500 with report.create(pl.Figure()) as figure:
501 figure.add_image(
502 str(plot_path.relative_to(report_dir)),
503 width=pl.utils.NoEscape(r"0.6\textwidth"),
504 )
505 figure.add_caption(
506 "Empirical comparison between the Single Best Solver and the Selector"
507 )
508 label = r"label{fig:sbsvsselector" + str(time_stamp) + r"}"
509 figure.append(pl.UnsafeCommand(f"{label}"))
511 # Comparison between the actual portfolio selector in Sparkle and the VBS.
512 vbs_performance = scenario_output.vbs_performance_data.tolist()
513 df = pd.DataFrame(
514 [vbs_performance, selector_performance],
515 index=["Virtual Best Solver", "Selector"],
516 dtype=float,
517 ).T
518 plot = latex.comparison_plot(df, "Virtual Best Solver vs Selector")
519 plot_path = (
520 plot_dir
521 / f"Virtual_Best_Solver_vs_Selector_{scenario.selector.model_class.__name__}.pdf"
522 )
523 plot.write_image(plot_path, width=500, height=500)
524 with report.create(pl.Figure()) as figure:
525 figure.add_image(
526 str(plot_path.relative_to(report_dir)),
527 width=pl.utils.NoEscape(r"0.6\textwidth"),
528 )
529 figure.add_caption(
530 "Empirical comparison between the Virtual Best Solver and the Selector"
531 )
532 figure.append(pl.UnsafeCommand(r"label{fig:vbsvsselector}"))
534 if scenario_output.test_sets:
535 report.append(pl.Subsection("Test Results"))
536 report.append("The following results are reported on the test set(s):")
537 with report.create(pl.Itemize()) as latex_list:
538 for test_set_name, test_set_size in scenario_output.test_sets:
539 result = round(
540 scenario_output.test_set_performance[test_set_name], MAX_DEC
541 )
542 latex_list.add_item(
543 pl.UnsafeCommand(
544 f"textbf{{{test_set_name}}} ({test_set_size} instances): {result}"
545 )
546 )
549def generate_parallel_portfolio_section(
550 report: pl.Document, scenario: PerformanceDataFrame
551) -> None:
552 """Generate a section for a parallel portfolio scenario."""
553 report_dir = Path(report.default_filepath).parent
554 portfolio_name = scenario.csv_filepath.parent.name
555 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
556 plot_dir = report_dir / f"{portfolio_name.replace(' ', '_')}_plots_{time_stamp}"
557 plot_dir.mkdir()
558 report.append(pl.Section(f"Parallel Portfolio {portfolio_name}"))
559 report.append(
560 "In this scenario, Sparkle runs the portfolio of Solvers on each instance in "
561 "parallel with "
562 f"{gv.settings().parallel_portfolio_num_seeds_per_solver} different "
563 "seeds. The cutoff time for each solver run is set to "
564 f"{gv.settings().solver_cutoff_time} seconds."
565 )
566 report.append(pl.Subsection("Solvers & Instance Sets"))
567 report.append("The following Solvers were used in the portfolio:")
568 # 1. Report on the Solvers and Instance Sets used for the portfolio
569 with report.create(pl.Itemize()) as solver_latex_list:
570 configs = scenario.configurations
571 for solver in scenario.solvers:
572 solver_name = solver.replace("_", " ")
573 solver_latex_list.add_item(
574 pl.UnsafeCommand(
575 f"textbf{{{solver_name}}} ({len(configs[solver])} configurations)"
576 )
577 )
578 report.append("The following Instance Sets were used in the portfolio:")
579 instance_sets = set(Path(instance).parent.name for instance in scenario.instances)
580 instance_set_count = [
581 len([i for i in scenario.instances if Path(i).parent.name == s])
582 for s in instance_sets
583 ]
584 with report.create(pl.Itemize()) as instance_set_latex_list:
585 for set_name, set_size in zip(instance_sets, instance_set_count):
586 set_name = set_name.replace("_", " ") # Latex fix
587 instance_set_latex_list.add_item(
588 pl.UnsafeCommand(f"textbf{{{set_name}}} ({set_size} instances)")
589 )
590 # 2. List which solver was the best on how many instances
591 report.append(pl.Subsection("Portfolio Performance"))
592 objective = scenario.objectives[0]
593 report.append(
594 f"The objective for the portfolio is {objective}. The "
595 "following performance of the solvers was found over the instances: "
596 )
597 best_solver_count = {solver: 0 for solver in scenario.solvers}
598 for instance in scenario.instances:
599 ranking = scenario.get_solver_ranking(objective=objective, instances=[instance])
600 best_solver_count[ranking[0][0]] += 1
602 with report.create(pl.Itemize()) as latex_list:
603 for solver, count in best_solver_count.items():
604 solver_name = solver.replace("_", " ")
605 latex_list.add_item(
606 pl.UnsafeCommand(
607 f"textbf{{{solver_name}}} was the best solver on {count} instance(s)."
608 )
609 )
610 # TODO Report how many instances remained unsolved
612 # 3. Create table showing the performance of the portfolio vs and all solvers,
613 # by showing the status count and number of times the solver was best
614 solver_cancelled_count = {solver: 0 for solver in scenario.solvers}
615 solver_timeout_count = {solver: 0 for solver in scenario.solvers}
616 status_objective = [
617 o for o in scenario.objective_names if o.lower().startswith("status")
618 ][0]
619 cancelled_status = [
620 SolverStatus.UNKNOWN,
621 SolverStatus.CRASHED,
622 SolverStatus.WRONG,
623 SolverStatus.ERROR,
624 SolverStatus.KILLED,
625 ]
626 for solver in scenario.solvers:
627 status = scenario.get_value(solver=solver, objective=status_objective)
628 for status in scenario.get_value(solver=solver, objective=status_objective):
629 status = SolverStatus(status)
630 if status in cancelled_status:
631 solver_cancelled_count[solver] += 1
632 elif status == SolverStatus.TIMEOUT:
633 solver_timeout_count[solver] += 1
635 report.append(latex.AutoRef("tab:parallelportfoliotable"))
636 report.append(pl.utils.bold(" "))
637 report.append(" shows the performance of the portfolio on the test set(s).")
638 tabular = pl.Tabular("r|rrrr")
639 tabular.add_row(["Solver", objective, "# Timeouts", "# Cancelled", "# Best"])
640 tabular.add_hline()
641 solver_performance = {
642 solver: round(performance, MAX_DEC)
643 for solver, _, performance in scenario.get_solver_ranking(objective=objective)
644 }
645 for solver in scenario.solvers:
646 tabular.add_row(
647 solver,
648 solver_performance[solver],
649 solver_timeout_count[solver],
650 solver_cancelled_count[solver],
651 best_solver_count[solver],
652 )
653 tabular.add_hline()
654 portfolio_performance = round(
655 scenario.best_performance(objective=objective), MAX_DEC
656 )
657 tabular.add_row(
658 portfolio_name,
659 portfolio_performance,
660 sum(solver_timeout_count.values()),
661 sum(solver_cancelled_count.values()),
662 sum(best_solver_count.values()),
663 )
664 table_portfolio = pl.Table(position="h")
665 table_portfolio.append(pl.UnsafeCommand("centering"))
666 table_portfolio.append(tabular)
667 table_portfolio.add_caption("Parallel Portfolio Performance")
668 table_portfolio.append(pl.UnsafeCommand(r"label{tab:parallelportfoliotable}"))
669 report.append(table_portfolio)
671 # 4. Create scatter plot analysis between the portfolio and the single best solver
672 sbs_name = scenario.get_solver_ranking(objective=objective)[0][0]
673 sbs_instance_performance = scenario.get_value(
674 solver=sbs_name, objective=objective.name
675 )
676 sbs_name = Path(sbs_name).name
677 report.append(latex.AutoRef("fig:portfoliovssbs"))
678 report.append(pl.utils.bold(" "))
679 report.append(
680 " shows the emprical comparison between the portfolio and the single "
681 f"best solver (SBS) {sbs_name}."
682 )
683 portfolio_instance_performance = scenario.best_instance_performance(
684 objective=objective.name
685 ).tolist()
687 df = pd.DataFrame(
688 [sbs_instance_performance, portfolio_instance_performance],
689 index=[f"SBS ({sbs_name}) Performance", "Portfolio Performance"],
690 dtype=float,
691 ).T
692 plot = latex.comparison_plot(df, None)
693 plot_path = plot_dir / f"sbs_{sbs_name}_vs_parallel_portfolio.pdf"
694 plot.write_image(plot_path, width=500, height=500)
695 with report.create(pl.Figure(position="h")) as figure:
696 figure.add_image(
697 str(plot_path.relative_to(report_dir)),
698 width=pl.utils.NoEscape(r"0.6\textwidth"),
699 )
700 figure.add_caption(f"Portfolio vs SBS Performance ({objective})")
701 figure.append(pl.UnsafeCommand(r"label{fig:portfoliovssbs}"))
704def append_dataframe_longtable(
705 report: pl.Document,
706 df: pd.DataFrame,
707 caption: str,
708 label: str,
709 max_cols: int = MAX_COLS_PER_TABLE,
710 wide_threshold: int = WIDE_TABLE_THRESHOLD,
711 num_keys: int = NUM_KEYS_PDF,
712) -> None:
713 """Appends a pandas DataFrame to a PyLaTeX document as one or more LaTeX longtables.
715 Args:
716 report: The PyLaTeX document to which the table(s) will be appended.
717 df: The DataFrame to be rendered as LaTeX longtable(s).
718 caption: The caption for the table(s).
719 label: The LaTeX label for referencing the table(s).
720 max_cols: Maximum number of columns per table chunk.
721 Defaults to MAX_COLS_PER_TABLE.
722 wide_threshold: Number of columns above which the table is rotated
723 to landscape. Defaults to WIDE_TABLE_THRESHOLD.
724 num_keys: Number of key columns to include in each table chunk.
725 Defaults to NUM_KEYS_PDF.
727 Returns:
728 None
729 """
730 import math
731 from typing import Union
733 def latex_escape_text(s: str) -> str:
734 """Escape special LaTeX characters in a string."""
735 # escape text, but insert our own LaTeX macro around it
736 return (
737 s.replace("\\", r"\textbackslash{}")
738 .replace("&", r"\&")
739 .replace("%", r"\%")
740 .replace("$", r"\$")
741 .replace("#", r"\#")
742 .replace("_", r"\_")
743 .replace("{", r"\{")
744 .replace("}", r"\}")
745 .replace("~", r"\textasciitilde{}")
746 .replace("^", r"\textasciicircum{}")
747 )
749 def last_path_segment(text: str) -> str:
750 """Keep only the last non-empty path-like segment. Handles both back and forwardslashes. Removes any leading/trailing slashes."""
751 t = str(text).strip().replace("\\", "/")
752 parts = [p for p in t.split("/") if p] # ignore empty segments
753 return parts[-1] if parts else ""
755 def wrap_fixed_shortstack(cell: str, width: int = MAX_CELL_LEN) -> str:
756 """Wrap long text to a fixed width for LaTeX tables."""
757 string_cell = last_path_segment(cell)
758 if len(string_cell) <= width:
759 return latex_escape_text(string_cell)
760 chunks = [
761 latex_escape_text(string_cell[index : index + width])
762 for index in range(0, len(string_cell), width)
763 ]
764 # left-aligned shortstack: forces line breaks and grows row height
765 return r"\shortstack[l]{" + r"\\ ".join(chunks) + "}"
767 def wrap_header_labels(
768 df: pd.DataFrame, width_per_cell: int = MAX_CELL_LEN
769 ) -> pd.DataFrame:
770 """Wrap long header labels to a fixed width for LaTeX tables."""
771 df_copy = df.copy()
772 if isinstance(df_copy.columns, pd.MultiIndex):
773 new_cols = []
774 for tup in df_copy.columns:
775 new_cols.append(
776 tuple(
777 wrap_fixed_shortstack(last_path_segment(index), width_per_cell)
778 if isinstance(index, str)
779 else index
780 for index in tup
781 )
782 )
783 names = [
784 (
785 wrap_fixed_shortstack(last_path_segment(name), width_per_cell)
786 if isinstance(name, str)
787 else name
788 )
789 for name in (df_copy.columns.names or [])
790 ]
791 df_copy.columns = pd.MultiIndex.from_tuples(new_cols, names=names)
792 else:
793 df_copy.columns = [
794 wrap_fixed_shortstack(last_path_segment(column), width_per_cell)
795 if isinstance(column, str)
796 else column
797 for column in df_copy.columns
798 ]
799 return df_copy
801 def format_cell(cell: Union[int, float, str]) -> str:
802 """Format a cell for printing in a LaTeX table."""
803 try:
804 float_cell = float(cell)
805 except (TypeError, ValueError):
806 return wrap_fixed_shortstack(last_path_segment(str(cell)), MAX_CELL_LEN)
808 if not math.isfinite(float_cell):
809 return "NaN"
811 if float_cell.is_integer():
812 return str(int(float_cell))
813 # round to MAX_DEC, then strip trailing zeros
814 s = f"{round(float_cell, MAX_DEC):.{MAX_DEC}f}".rstrip("0").rstrip(".")
815 return s
817 df_copy = df.copy()
819 # Inorder to be able to show the key columns, we need to reset the index
820 if not isinstance(df_copy.index, pd.RangeIndex) and df_copy.index.name in (
821 None,
822 "index",
823 "",
824 ):
825 df_copy = df_copy.reset_index()
827 # Remove the Seed column from the performance dataframe since it is not
828 # very informative and clutters the table
829 if isinstance(df, PerformanceDataFrame):
830 mask = df_copy.columns.get_level_values("Meta") == "Seed"
831 df_copy = df_copy.loc[:, ~mask]
833 # For performance dataframe, we want to show values of objectives with their corresponding instance and run.
834 # Since objective, instance and run are indexes in the performance dataframe,
835 # they will be part of the index and we need to reset the index to get them
836 # as columns.
837 # We'll name them as key columns, since they are the key to identify the value of the objective
838 # for a given instance and run.
839 # (Respectively FeatureGroup, FeatureName, Extractor in feature dataframe)
840 keys = df_copy.iloc[:, :num_keys] # Key columns
842 # Split the dataframe into chunks of max_cols per page
843 number_column_chunks = max((df_copy.shape[1] - 1) // max_cols + 1, 1)
844 for i in range(number_column_chunks):
845 report.append(NewPage())
846 full_part = None
847 start_col = i * max_cols
848 end_col = (i + 1) * max_cols
850 # Select the value columns for this chunk
851 values = df_copy.iloc[
852 :,
853 start_col + num_keys : end_col + num_keys,
854 ]
856 # Concatenate the key and value columns
857 full_part = pd.concat([keys, values], axis=1)
859 # If there are no value columns left, we are done
860 if (full_part.shape[1]) <= num_keys:
861 break
863 full_part_wrapped = wrap_header_labels(full_part, MAX_CELL_LEN)
865 # tell pandas how to print numbers
866 formatters = {col: format_cell for col in full_part_wrapped.columns}
868 tex = full_part_wrapped.to_latex(
869 longtable=True,
870 index=False,
871 escape=False, # We want to split the long words, not escape them
872 caption=caption + (f" (part {i + 1})" if number_column_chunks > 1 else ""),
873 label=label + f"-p{i + 1}" if number_column_chunks > 1 else label,
874 float_format=None,
875 multicolumn=True,
876 multicolumn_format="c",
877 multirow=False,
878 column_format="c" * full_part_wrapped.shape[1],
879 formatters=formatters,
880 )
882 # centre the whole table horizontally
883 centred_tex = "\\begin{center}\n" + tex + "\\end{center}\n"
885 # rotate if still too wide
886 if full_part_wrapped.shape[1] > wide_threshold:
887 report.append(NoEscape(r"\begin{landscape}"))
888 report.append(NoEscape(centred_tex))
889 report.append(NoEscape(r"\end{landscape}"))
890 else:
891 report.append(NoEscape(centred_tex))
894def generate_appendix(
895 report: pl.Document,
896 performance_data: PerformanceDataFrame,
897 feature_data: FeatureDataFrame,
898) -> None:
899 """Appendix.
901 Args:
902 report: The LaTeX document object to which the appendix will be added.
903 performance_data: The performance data to be included in the appendix.
904 feature_data: The feature data to be included in the appendix.
906 Returns:
907 None
908 """
909 # preamble
910 for pkg in ("longtable", "pdflscape", "caption", "booktabs", "placeins"):
911 p = pl.Package(pkg)
912 if p not in report.packages:
913 report.packages.append(p)
915 report.append(pl.NewPage())
916 report.append(pl.NoEscape(r"\clearpage"))
917 report.append(pl.NoEscape(r"\FloatBarrier"))
918 report.append(pl.UnsafeCommand("appendix"))
919 report.append(pl.Section("Performance DataFrame"))
921 append_dataframe_longtable(
922 report,
923 performance_data,
924 caption="Performance DataFrame",
925 label="tab:perf_data",
926 max_cols=MAX_COLS_PER_TABLE,
927 wide_threshold=WIDE_TABLE_THRESHOLD,
928 num_keys=NUM_KEYS_PDF,
929 )
931 report.append(pl.Section("Feature DataFrame"))
932 append_dataframe_longtable(
933 report,
934 feature_data,
935 caption="Feature DataFrame",
936 label="tab:feature_data",
937 max_cols=MAX_COLS_PER_TABLE,
938 wide_threshold=WIDE_TABLE_THRESHOLD,
939 num_keys=NUM_KEYS_FDF,
940 )
942 report.append(pl.NoEscape(r"\FloatBarrier"))
945def main(argv: list[str]) -> None:
946 """Generate a report for executed experiments in the platform."""
947 # Log command call
948 sl.log_command(sys.argv, gv.settings().random_state)
950 # Define command line arguments
951 parser = parser_function()
953 # Process command line arguments
954 args = parser.parse_args(argv)
956 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)
957 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)
959 # Fetch all known scenarios
960 configuration_scenarios = gv.configuration_scenarios(refresh=True)
961 selection_scenarios = gv.selection_scenarios(refresh=True)
962 parallel_portfolio_scenarios = gv.parallel_portfolio_scenarios()
964 # Filter scenarios based on args
965 if args.solvers:
966 solvers = [
967 resolve_object_name(
968 s, gv.solver_nickname_mapping, gv.settings().DEFAULT_solver_dir, Solver
969 )
970 for s in args.solvers
971 ]
972 configuration_scenarios = [
973 s
974 for s in configuration_scenarios
975 if s.solver.directory in [s.directory for s in solvers]
976 ]
977 selection_scenarios = [
978 s
979 for s in selection_scenarios
980 if set(s.solvers).intersection([str(s.directory) for s in solvers])
981 ]
982 parallel_portfolio_scenarios = [
983 s
984 for s in parallel_portfolio_scenarios
985 if set(s.solvers).intersection([str(s.directory) for s in solvers])
986 ]
987 if args.instance_sets:
988 instance_sets = [
989 resolve_object_name(
990 s,
991 gv.instance_set_nickname_mapping,
992 gv.settings().DEFAULT_instance_dir,
993 Instance_Set,
994 )
995 for s in args.instance_sets
996 ]
997 configuration_scenarios = [
998 s
999 for s in configuration_scenarios
1000 if s.instance_set.directory in [s.directory for s in instance_sets]
1001 ]
1002 selection_scenarios = [
1003 s
1004 for s in selection_scenarios
1005 if set(s.instance_sets).intersection([str(s.name) for s in instance_sets])
1006 ]
1007 parallel_portfolio_scenarios = [
1008 s
1009 for s in parallel_portfolio_scenarios
1010 if set(s.instance_sets).intersection([str(s.name) for s in instance_sets])
1011 ]
1013 processed_configuration_scenarios = []
1014 processed_selection_scenarios = []
1015 possible_test_sets = [
1016 Instance_Set(p) for p in gv.settings().DEFAULT_instance_dir.iterdir()
1017 ]
1018 for configuration_scenario in configuration_scenarios:
1019 processed_configuration_scenarios.append(
1020 (
1021 ConfigurationOutput(
1022 configuration_scenario, performance_data, possible_test_sets
1023 ),
1024 configuration_scenario,
1025 )
1026 )
1027 for selection_scenario in selection_scenarios:
1028 processed_selection_scenarios.append(
1029 (SelectionOutput(selection_scenario), selection_scenario)
1030 )
1032 raw_output = gv.settings().DEFAULT_output_analysis / "JSON"
1033 if raw_output.exists(): # Clean
1034 shutil.rmtree(raw_output)
1035 raw_output.mkdir()
1037 # Write JSON
1038 output_json = {}
1039 for output, configuration_scenario in processed_configuration_scenarios:
1040 output_json[configuration_scenario.name] = output.serialise()
1041 for output, selection_scenario in processed_selection_scenarios:
1042 output_json[selection_scenario.name] = output.serialise()
1043 # TODO: We do not have an output object for parallel portfolios
1045 raw_output_json = raw_output / "output.json"
1046 with raw_output_json.open("w") as f:
1047 json.dump(output_json, f, indent=4)
1049 print(f"Machine readable output written to: {raw_output_json}")
1051 if args.only_json: # Done
1052 sys.exit(0)
1054 # TODO: Group scenarios based on:
1055 # - Configuration / Selection / Parallel Portfolio
1056 # - Training Instance Set / Testing Instance Set
1057 # - Configurators can be merged as long as we can match their budgets clearly
1058 report_directory = gv.settings().DEFAULT_output_analysis / "report"
1059 if report_directory.exists(): # Clean it
1060 shutil.rmtree(report_directory)
1061 report_directory.mkdir()
1062 target_path = report_directory / "report"
1063 report = pl.document.Document(
1064 default_filepath=str(target_path), document_options=["british"]
1065 )
1066 bibpath = gv.settings().bibliography_path
1067 newbibpath = report_directory / "report.bib"
1068 shutil.copy(bibpath, newbibpath)
1069 # BUGFIX for unknown package load in PyLatex
1070 p = pl.package.Package("lastpage")
1071 if p in report.packages:
1072 report.packages.remove(p)
1073 report.packages.append(
1074 pl.package.Package(
1075 "geometry",
1076 options=[
1077 "verbose",
1078 "tmargin=3.5cm",
1079 "bmargin=3.5cm",
1080 "lmargin=3cm",
1081 "rmargin=3cm",
1082 ],
1083 )
1084 )
1085 # Unsafe command for \emph{Sparkle}
1086 report.preamble.extend(
1087 [
1088 pl.UnsafeCommand("title", r"\emph{Sparkle} Algorithm Portfolio report"),
1089 pl.UnsafeCommand(
1090 "author",
1091 r"Generated by \emph{Sparkle} "
1092 f"(version: {__sparkle_version__})",
1093 ),
1094 ]
1095 )
1096 report.append(pl.Command("maketitle"))
1097 report.append(pl.Section("Introduction"))
1098 # TODO: A quick overview to the introduction on whats considered in the report
1099 # regarding Solvers, Instance Sets and Feature Extractors
1100 report.append(
1101 pl.UnsafeCommand(
1102 r"emph{Sparkle}~\cite{Hoos15} is a multi-agent problem-solving platform based on"
1103 r" Programming by Optimisation (PbO)~\cite{Hoos12}, and would provide a number "
1104 "of effective algorithm optimisation techniques (such as automated algorithm "
1105 "configuration, portfolio-based algorithm selection, etc.) to accelerate the "
1106 "existing solvers."
1107 )
1108 )
1110 for scenario_output, scenario in processed_configuration_scenarios:
1111 generate_configuration_section(report, scenario, scenario_output)
1113 for scenario_output, scenario in processed_selection_scenarios:
1114 generate_selection_section(report, scenario, scenario_output)
1116 for parallel_dataframe in parallel_portfolio_scenarios:
1117 generate_parallel_portfolio_section(report, parallel_dataframe)
1119 # Check if user wants to add appendix and
1120 settings = gv.settings(args)
1121 if settings.appendices:
1122 generate_appendix(report, performance_data, feature_data)
1124 # Adding bibliography
1125 report.append(pl.NewPage()) # Ensure it starts on new page
1126 report.append(pl.Command("bibliographystyle", arguments=["plain"]))
1127 report.append(pl.Command("bibliography", arguments=[str(newbibpath)]))
1128 # Generate the report .tex and .pdf
1129 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")
1130 # TODO: This should be done by PyLatex. Generate the bib and regenerate the report
1131 # Reference for the (terrible) solution: https://tex.stackexchange.com/
1132 # questions/63852/question-mark-or-bold-citation-key-instead-of-citation-number
1133 import subprocess
1135 # Run BibTex silently
1136 subprocess.run(
1137 ["bibtex", newbibpath.with_suffix("")],
1138 stdout=subprocess.DEVNULL,
1139 stderr=subprocess.DEVNULL,
1140 )
1141 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")
1142 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")
1143 print(f"Report generated at {target_path}.pdf")
1144 sys.exit(0)
1147if __name__ == "__main__":
1148 main(sys.argv[1:])