Coverage for sparkle/CLI/generate_report.py: 22%

469 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 10:17 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to generate a report for an executed experiment.""" 

3 

4import sys 

5import shutil 

6import argparse 

7from pathlib import Path 

8import time 

9import json 

10import pandas as pd 

11 

12from pylatex import NoEscape, NewPage 

13import pylatex as pl 

14from sparkle import __version__ as __sparkle_version__ 

15 

16from sparkle.CLI.help import global_variables as gv 

17from sparkle.CLI.help import resolve_object_name 

18from sparkle.CLI.help import logging as sl 

19from sparkle.CLI.help import argparse_custom as ac 

20 

21from sparkle.solver import Solver 

22from sparkle.instance import Instance_Set 

23from sparkle.selector import Extractor 

24from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

25from sparkle.configurator.configurator import ConfigurationScenario 

26from sparkle.selector.selector import SelectionScenario 

27from sparkle.types import SolverStatus 

28from sparkle.platform import Settings 

29 

30from sparkle.platform import latex 

31from sparkle.platform.output.configuration_output import ConfigurationOutput 

32from sparkle.platform.output.selection_output import SelectionOutput 

33 

34 

35MAX_DEC = 4 # Maximum decimals used for each reported value 

36MAX_COLS_PER_TABLE = 2 # number of value columns extra to number of key columns 

37WIDE_TABLE_THRESHOLD = 4 # columns above which we switch to landscape 

38NUM_KEYS_PDF = 3 

39NUM_KEYS_FDF = 3 

40MAX_CELL_LEN = 17 

41 

42 

43def parser_function() -> argparse.ArgumentParser: 

44 """Define the command line arguments.""" 

45 parser = argparse.ArgumentParser( 

46 description="Generates a report for all known selection, configuration and " 

47 "parallel portfolio scenarios will be generated.", 

48 epilog="If you wish to filter specific solvers, instance sets, ... have a look " 

49 "at the command line arguments.", 

50 ) 

51 # Add argument for filtering solvers 

52 parser.add_argument( 

53 *ac.SolversReportArgument.names, **ac.SolversReportArgument.kwargs 

54 ) 

55 # Add argument for filtering instance sets 

56 parser.add_argument( 

57 *ac.InstanceSetsReportArgument.names, **ac.InstanceSetsReportArgument.kwargs 

58 ) 

59 

60 # Add argument for filtering appendix 

61 parser.add_argument( 

62 *Settings.OPTION_appendices.args, **Settings.OPTION_appendices.kwargs 

63 ) 

64 

65 # Add argument for filtering configurators? 

66 # Add argument for filtering selectors? 

67 # Add argument for filtering ??? scenario ids? configuration ids? 

68 parser.add_argument(*ac.GenerateJSONArgument.names, **ac.GenerateJSONArgument.kwargs) 

69 return parser 

70 

71 

72def generate_configuration_section( 

73 report: pl.Document, 

74 scenario: ConfigurationScenario, 

75 scenario_output: ConfigurationOutput, 

76) -> None: 

77 """Generate a section for a configuration scenario.""" 

78 report_dir = Path(report.default_filepath).parent 

79 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) 

80 plot_dir = ( 

81 report_dir 

82 / f"{scenario.configurator.__name__}_{scenario.name}_plots_{time_stamp}" 

83 ) 

84 plot_dir.mkdir(exist_ok=True) 

85 

86 # 1. Write section intro 

87 report.append( 

88 pl.Section( 

89 f"{scenario.configurator.__name__} Configuration: " 

90 f"{scenario.solver.name} on {scenario.instance_set.name}" 

91 ) 

92 ) 

93 report.append("In this scenario, ") 

94 report.append( 

95 pl.UnsafeCommand( 

96 f"textbf{{{scenario.configurator.__name__}}} " 

97 f"({scenario.configurator.full_name})~\\cite" 

98 f"{{{scenario.configurator.__name__}}} with version " 

99 f"{scenario.configurator.version} was used for configuration. " 

100 ) 

101 ) 

102 report.append( 

103 f"The Solver {scenario.solver} was optimised on training set " 

104 f"{scenario.instance_set}. The scenario was run {scenario.number_of_runs} " 

105 f"times independently with different seeds, yielding {scenario.number_of_runs} " 

106 f"configurations. The cutoff time for the solver was set to " 

107 f"{scenario.solver_cutoff_time} seconds. The optimised objective is " 

108 f"{scenario.sparkle_objectives[0]}. Each Configuration was evaluated on the " 

109 "training set to determine the best configuration, e.g. the best " 

110 f"{scenario.sparkle_objectives[0]} value on the training set." 

111 ) 

112 

113 # 2. Report all the configurator settings in table format 

114 report.append(pl.Subsection("Configurator Settings")) 

115 report.append( 

116 f"The following settings were used for {scenario.configurator.__name__}:\n" 

117 ) 

118 tabular = pl.Tabular("l|r") 

119 tabular.add_row("Setting", "Value") 

120 tabular.add_hline() 

121 for setting, value in scenario.serialise().items(): 

122 # Keep only the last path segment for paths 

123 # Otherwise tables get too wide and we can't see other values 

124 t = str(value).strip().replace("\\", "/") 

125 parts = [p for p in t.split("/") if p] 

126 if parts[-1]: 

127 tabular.add_row([setting, parts[-1]]) 

128 else: 

129 tabular.add_row([setting, "None"]) 

130 table_conf_settings = pl.Table(position="h") 

131 table_conf_settings.append(pl.UnsafeCommand("centering")) 

132 table_conf_settings.append(tabular) 

133 table_conf_settings.add_caption("Configurator Settings") 

134 report.append(table_conf_settings) 

135 

136 # 3. Report details on instance and solver used 

137 report.append(pl.Subsection("Solver & Instance Set(s) Details")) 

138 cs = scenario_output.solver.get_configuration_space() 

139 report.append( 

140 f"The solver {scenario_output.solver} was configured using " 

141 f"{len(cs.values())} configurable (hyper)parameters. " 

142 f"The configuration space has {len(cs.conditions)} conditions. " 

143 ) 

144 report.append("The following instance sets were used for the scenario:") 

145 with report.create(pl.Itemize()) as instance_set_latex_list: 

146 for instance_set in [ 

147 scenario_output.instance_set_train 

148 ] + scenario_output.test_instance_sets: 

149 training_set_name = instance_set.name.replace("_", " ") # Latex fix 

150 instance_set_latex_list.add_item( 

151 pl.UnsafeCommand( 

152 f"textbf{{{training_set_name}}} ({instance_set.size} instances)" 

153 ) 

154 ) 

155 

156 # Function to generate a results summary of default vs best on an instance set 

157 def instance_set_summary(instance_set_name: str) -> None: 

158 """Generate a results summary of default vs best on an instance set.""" 

159 instance_set_results = scenario_output.instance_set_results[instance_set_name] 

160 report.append( 

161 f"The {scenario.sparkle_objectives[0]} value of the Default " 

162 f"Configuration on {instance_set_name} was " 

163 ) 

164 report.append( 

165 pl.UnsafeCommand( 

166 f"textbf{{{round(instance_set_results.default_performance, MAX_DEC)}}}.\n" 

167 ) 

168 ) 

169 report.append( 

170 f"The {scenario.sparkle_objectives[0]} value of the Best " 

171 f"Configuration on {instance_set_name} was " 

172 ) 

173 report.append( 

174 pl.UnsafeCommand( 

175 f"textbf{{{round(instance_set_results.best_performance, MAX_DEC)}}}.\n" 

176 ) 

177 ) 

178 report.append("In ") 

179 report.append(latex.AutoRef(f"fig:bestvsdefault{instance_set_name}{time_stamp}")) 

180 report.append(pl.utils.bold(" ")) # Force white space 

181 report.append("the results are plotted per instance.") 

182 # Create graph to compare best configuration vs default on the instance set 

183 

184 df = pd.DataFrame( 

185 [ 

186 instance_set_results.default_instance_performance, 

187 instance_set_results.best_instance_performance, 

188 ], 

189 index=["Default Configuration", "Best Configuration"], 

190 dtype=float, 

191 ).T 

192 plot = latex.comparison_plot(df, None) 

193 plot_path = ( 

194 plot_dir / f"{scenario_output.best_configuration_key}_vs_" 

195 f"Default_{instance_set_name}.pdf" 

196 ) 

197 plot.write_image(plot_path, width=500, height=500) 

198 with report.create(pl.Figure(position="h")) as figure: 

199 figure.add_image( 

200 str(plot_path.relative_to(report_dir)), 

201 width=pl.utils.NoEscape(r"0.6\textwidth"), 

202 ) 

203 figure.add_caption( 

204 f"Best vs Default Performance on {instance_set_name} " 

205 f"({scenario.sparkle_objectives[0]})" 

206 ) 

207 figure.append( 

208 pl.UnsafeCommand( 

209 r"label{" 

210 f"fig:bestvsdefault{instance_set_name}{time_stamp}" 

211 r"}" 

212 ) 

213 ) 

214 if scenario.sparkle_objectives[0].time: # Write status table 

215 report.append("The following Solver status were found per instance:") 

216 tabular = pl.Tabular("l|c|c|c") 

217 tabular.add_row("Status", "Default", "Best", "Overlap") 

218 tabular.add_hline() 

219 # Count the statuses 

220 for status in SolverStatus: 

221 default_count, best_count, overlap_count = 0, 0, 0 

222 for instance in instance_set_results.instance_status_default.keys(): 

223 instance = str(instance) 

224 default_hit = ( 

225 instance_set_results.instance_status_default[instance] == status 

226 ) 

227 best_hit = ( 

228 instance_set_results.instance_status_best[instance] == status 

229 ) 

230 default_count += default_hit 

231 best_count += best_hit 

232 overlap_count += default_hit and best_hit 

233 if default_count or best_count: 

234 tabular.add_row(status, default_count, best_count, overlap_count) 

235 table_status_values = pl.Table(position="h") 

236 table_status_values.append(pl.UnsafeCommand("centering")) 

237 table_status_values.append(tabular) 

238 table_status_values.add_caption( 

239 "Status count for the best and default configuration." 

240 ) 

241 report.append(table_status_values) 

242 

243 # 4. Report the results of the best configuration on the training set vs the default 

244 report.append( 

245 pl.Subsection( 

246 f"Comparison of Default and Best Configuration on Training Set " 

247 f"{scenario_output.instance_set_train.name}" 

248 ) 

249 ) 

250 instance_set_summary(scenario_output.instance_set_train.name) 

251 

252 # 5. Report the actual config values 

253 report.append(pl.Subsubsection("Best Configuration Values")) 

254 if ( 

255 scenario_output.best_configuration_key 

256 == PerformanceDataFrame.default_configuration 

257 ): 

258 report.append( 

259 "The configurator failed to find a better configuration than the " 

260 "default configuration on the training set in this scenario." 

261 ) 

262 else: 

263 report.append( 

264 "The following parameter values " 

265 "were found to be the best on the training set:\n" 

266 ) 

267 tabular = pl.Tabular("l|r") 

268 tabular.add_row("Parameter", "Value") 

269 tabular.add_hline() 

270 for parameter, value in scenario_output.best_configuration.items(): 

271 tabular.add_row([parameter, str(value)]) 

272 table_best_values = pl.Table(position="h") 

273 table_best_values.append(pl.UnsafeCommand("centering")) 

274 table_best_values.append(tabular) 

275 table_best_values.add_caption("Best found configuration values") 

276 report.append(table_best_values) 

277 

278 # 6. Report the results of best vs default conf on the test sets 

279 

280 for test_set in scenario_output.test_instance_sets: 

281 report.append( 

282 pl.Subsection( 

283 f"Comparison of Default and Best Configuration on Test Set " 

284 f"{test_set.name}" 

285 ) 

286 ) 

287 instance_set_summary(test_set.name) 

288 

289 # 7. Report the parameter ablation scenario if present 

290 if scenario.ablation_scenario: 

291 report.append(pl.Subsection("Parameter importance via Ablation")) 

292 report.append("Ablation analysis ") 

293 report.append(pl.UnsafeCommand(r"cite{FawcettHoos16} ")) 

294 test_set = scenario.ablation_scenario.test_set 

295 if not scenario.ablation_scenario.test_set: 

296 test_set = scenario.ablation_scenario.train_set 

297 report.append( 

298 f"is performed from the default configuration of {scenario.solver} to the " 

299 f"best found configuration ({scenario_output.best_configuration_key}) " 

300 "to see which parameter changes between them contribute most to the improved" 

301 " performance. The ablation path uses the training set " 

302 f"{scenario.ablation_scenario.train_set.name} and validation is performed " 

303 f"on the test set {test_set.name}. The set of parameters that differ in the " 

304 "two configurations will form the ablation path. Starting from the default " 

305 "configuration, the path is computed by performing a sequence of rounds. In" 

306 " a round, each available parameter is flipped in the configuration and is " 

307 "validated on its performance. The flipped parameter with the best " 

308 "performance in that round, is added to the configuration and the next round" 

309 " starts with the remaining parameters. This repeats until all parameters " 

310 "are flipped, which is the best found configuration. The analysis resulted " 

311 "in the ablation presented in " 

312 ) 

313 report.append(latex.AutoRef("tab:ablationtable")) 

314 report.append(".") 

315 

316 # Add ablation table 

317 tabular = pl.Tabular("r|l|r|r|r") 

318 data = scenario.ablation_scenario.read_ablation_table() 

319 for index, row in enumerate(data): 

320 tabular.add_row(*row) 

321 if index == 0: 

322 tabular.add_hline() 

323 table_ablation = pl.Table(position="h") 

324 table_ablation.append(pl.UnsafeCommand("centering")) 

325 table_ablation.append(tabular) 

326 table_ablation.add_caption("Ablation table") 

327 table_ablation.append(pl.UnsafeCommand(r"label{tab:ablationtable}")) 

328 report.append(table_ablation) 

329 

330 

331def generate_selection_section( 

332 report: pl.Document, scenario: SelectionScenario, scenario_output: SelectionOutput 

333) -> None: 

334 """Generate a section for a selection scenario.""" 

335 report_dir = Path(report.default_filepath).parent 

336 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) 

337 plot_dir = report_dir / f"{scenario.name.replace(' ', '_')}_plots_{time_stamp}" 

338 plot_dir.mkdir(exist_ok=True) 

339 report.append( 

340 pl.Section( 

341 f"Selection: {scenario.selector.model_class.__name__} on " 

342 f"{' '.join([s[0] for s in scenario_output.training_instance_sets])}" 

343 ) 

344 ) 

345 report.append( 

346 f"In this scenario, a {scenario.selector.model_class.__name__} " 

347 f" ({scenario.selector.selector_class.__name__}) was trained on the " 

348 "performance and feature data using ASF-lib. The following solvers " 

349 f"were run with a cutoff time of {scenario.solver_cutoff} seconds:" 

350 ) 

351 with report.create(pl.Itemize()) as solver_latex_list: 

352 for solver_name in scenario_output.solvers.keys(): 

353 solver_name = solver_name.replace("_", " ") 

354 solver_latex_list.add_item( 

355 pl.UnsafeCommand( 

356 f"textbf{{{solver_name}}} " 

357 f"({len(scenario_output.solvers[solver_name])} configurations)" 

358 ) 

359 ) 

360 # Report training instance sets 

361 report.append("The following training instance sets were used:") 

362 with report.create(pl.Itemize()) as instance_set_latex_list: 

363 for training_set_name, set_size in scenario_output.training_instance_sets: 

364 training_set_name = training_set_name.replace("_", " ") # Latex fix 

365 instance_set_latex_list.add_item( 

366 pl.UnsafeCommand(f"textbf{{{training_set_name}}} ({set_size} instances)") 

367 ) 

368 # Report feature extractors 

369 report.append( 

370 "The following feature extractors were used with a extractor cutoff " 

371 f"time of {scenario.extractor_cutoff} seconds:" 

372 ) 

373 with report.create(pl.Itemize()) as feature_extractor_latex_list: 

374 for feature_extractor_name in scenario.feature_extractors: 

375 extractor = resolve_object_name( 

376 feature_extractor_name, 

377 gv.file_storage_data_mapping[gv.extractor_nickname_list_path], 

378 gv.settings().DEFAULT_extractor_dir, 

379 class_name=Extractor, 

380 ) 

381 feature_extractor_name = feature_extractor_name.replace("_", " ") # Latex 

382 feature_extractor_latex_list.add_item( 

383 pl.UnsafeCommand( 

384 f"textbf{{{feature_extractor_name}}} " 

385 f"({extractor.output_dimension} features)" 

386 ) 

387 ) 

388 # Report Training results 

389 report.append(pl.Subsection("Training Results")) 

390 # 1. Report VBS and selector performance, create ranking list of the solvers 

391 # TODO Add ref here to the training sets section? 

392 report.append( 

393 f"In this section, the {scenario.objective.name} results for the " 

394 "portfolio selector on solving the training instance set(s) listed " 

395 "is reported. " 

396 ) 

397 report.append( 

398 f"The {scenario.objective.name} values for the Virtual Best Solver " 

399 "(VBS), i.e., the perfect portfolio selector is " 

400 ) 

401 report.append(pl.utils.bold(f"{round(scenario_output.vbs_performance, MAX_DEC)}")) 

402 report.append(", the actual portfolio selector performance is ") 

403 report.append( 

404 pl.utils.bold(f"{round(scenario_output.actual_performance, MAX_DEC)}.\n") 

405 ) 

406 

407 report.append( 

408 f"Below, the solvers are ranked based on {scenario.objective.name} performance:" 

409 ) 

410 with report.create(pl.Enumerate()) as ranking_list: 

411 for solver_name, conf_id, value in scenario_output.solver_performance_ranking: 

412 value = round(value, MAX_DEC) 

413 solver_name = solver_name.replace("_", " ") # Latex fix 

414 conf_id = conf_id.replace("_", " ") # Latex fix 

415 ranking_list.add_item( 

416 pl.UnsafeCommand(f"textbf{{{solver_name}}} ({conf_id}): {value}") 

417 ) 

418 

419 # 2. Marginal contribution ranking list VBS 

420 report.append(pl.Subsubsection("Marginal Contribution Ranking List")) 

421 report.append( 

422 "The following list shows the marginal contribution ranking list for the VBS:" 

423 ) 

424 with report.create(pl.Enumerate()) as ranking_list: 

425 for ( 

426 solver_name, 

427 conf_id, 

428 contribution, 

429 performance, 

430 ) in scenario_output.marginal_contribution_perfect: 

431 contribution, performance = ( 

432 round(contribution, MAX_DEC), 

433 round(performance, MAX_DEC), 

434 ) 

435 solver_name = solver_name.replace("_", " ") # Latex fix 

436 conf_id = conf_id.replace("_", " ") # Latex fix 

437 ranking_list.add_item( 

438 pl.UnsafeCommand( 

439 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})" 

440 ) 

441 ) 

442 

443 # 3. Marginal contribution ranking list actual selector 

444 report.append( 

445 "The following list shows the marginal contribution ranking list for " 

446 "the actual portfolio selector:" 

447 ) 

448 with report.create(pl.Enumerate()) as ranking_list: 

449 for ( 

450 solver_name, 

451 conf_id, 

452 contribution, 

453 performance, 

454 ) in scenario_output.marginal_contribution_actual: 

455 contribution, performance = ( 

456 round(contribution, MAX_DEC), 

457 round(performance, MAX_DEC), 

458 ) 

459 solver_name = solver_name.replace("_", " ") # Latex fix 

460 conf_id = conf_id.replace("_", " ") # Latex fix 

461 ranking_list.add_item( 

462 pl.UnsafeCommand( 

463 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})" 

464 ) 

465 ) 

466 

467 # 4. Create scatter plot analysis 

468 report.append(pl.Subsubsection("Scatter Plot Analysis")) 

469 report.append(latex.AutoRef(f"fig:sbsvsselector{time_stamp}")) 

470 report.append(pl.utils.bold(" ")) # Trick to force a white space 

471 report.append( 

472 "shows the empirical comparison between the portfolio " 

473 "selector and the single best solver (SBS). " 

474 ) 

475 report.append(latex.AutoRef("fig:vbsvsselector")) 

476 report.append(pl.utils.bold(" ")) # Trick to force a white space 

477 report.append( 

478 "shows the empirical comparison between the actual portfolio selector " 

479 "and the virtual best solver (VBS)." 

480 ) 

481 # Create figure on SBS versus the selector 

482 sbs_name, sbs_config, _ = scenario_output.solver_performance_ranking[0] 

483 # sbs_plot_name = f"{Path(sbs_name).name} ({sbs_config})" 

484 sbs_performance = scenario_output.sbs_performance 

485 selector_performance = scenario_output.actual_performance_data 

486 

487 # Join the data together 

488 

489 df = pd.DataFrame( 

490 [sbs_performance, selector_performance], 

491 index=[f"{Path(sbs_name).name} ({sbs_config})", "Selector"], 

492 dtype=float, 

493 ).T 

494 plot = latex.comparison_plot(df, "Single Best Solver vs Selector") 

495 plot_path = ( 

496 plot_dir / f"{Path(sbs_name).name}_{sbs_config}_vs_" 

497 f"Selector_{scenario.selector.model_class.__name__}.pdf" 

498 ) 

499 plot.write_image(plot_path, width=500, height=500) 

500 with report.create(pl.Figure()) as figure: 

501 figure.add_image( 

502 str(plot_path.relative_to(report_dir)), 

503 width=pl.utils.NoEscape(r"0.6\textwidth"), 

504 ) 

505 figure.add_caption( 

506 "Empirical comparison between the Single Best Solver and the Selector" 

507 ) 

508 label = r"label{fig:sbsvsselector" + str(time_stamp) + r"}" 

509 figure.append(pl.UnsafeCommand(f"{label}")) 

510 

511 # Comparison between the actual portfolio selector in Sparkle and the VBS. 

512 vbs_performance = scenario_output.vbs_performance_data.tolist() 

513 df = pd.DataFrame( 

514 [vbs_performance, selector_performance], 

515 index=["Virtual Best Solver", "Selector"], 

516 dtype=float, 

517 ).T 

518 plot = latex.comparison_plot(df, "Virtual Best Solver vs Selector") 

519 plot_path = ( 

520 plot_dir 

521 / f"Virtual_Best_Solver_vs_Selector_{scenario.selector.model_class.__name__}.pdf" 

522 ) 

523 plot.write_image(plot_path, width=500, height=500) 

524 with report.create(pl.Figure()) as figure: 

525 figure.add_image( 

526 str(plot_path.relative_to(report_dir)), 

527 width=pl.utils.NoEscape(r"0.6\textwidth"), 

528 ) 

529 figure.add_caption( 

530 "Empirical comparison between the Virtual Best Solver and the Selector" 

531 ) 

532 figure.append(pl.UnsafeCommand(r"label{fig:vbsvsselector}")) 

533 

534 if scenario_output.test_sets: 

535 report.append(pl.Subsection("Test Results")) 

536 report.append("The following results are reported on the test set(s):") 

537 with report.create(pl.Itemize()) as latex_list: 

538 for test_set_name, test_set_size in scenario_output.test_sets: 

539 result = round( 

540 scenario_output.test_set_performance[test_set_name], MAX_DEC 

541 ) 

542 latex_list.add_item( 

543 pl.UnsafeCommand( 

544 f"textbf{{{test_set_name}}} ({test_set_size} instances): {result}" 

545 ) 

546 ) 

547 

548 

549def generate_parallel_portfolio_section( 

550 report: pl.Document, scenario: PerformanceDataFrame 

551) -> None: 

552 """Generate a section for a parallel portfolio scenario.""" 

553 report_dir = Path(report.default_filepath).parent 

554 portfolio_name = scenario.csv_filepath.parent.name 

555 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time())) 

556 plot_dir = report_dir / f"{portfolio_name.replace(' ', '_')}_plots_{time_stamp}" 

557 plot_dir.mkdir() 

558 report.append(pl.Section(f"Parallel Portfolio {portfolio_name}")) 

559 report.append( 

560 "In this scenario, Sparkle runs the portfolio of Solvers on each instance in " 

561 "parallel with " 

562 f"{gv.settings().parallel_portfolio_num_seeds_per_solver} different " 

563 "seeds. The cutoff time for each solver run is set to " 

564 f"{gv.settings().solver_cutoff_time} seconds." 

565 ) 

566 report.append(pl.Subsection("Solvers & Instance Sets")) 

567 report.append("The following Solvers were used in the portfolio:") 

568 # 1. Report on the Solvers and Instance Sets used for the portfolio 

569 with report.create(pl.Itemize()) as solver_latex_list: 

570 configs = scenario.configurations 

571 for solver in scenario.solvers: 

572 solver_name = solver.replace("_", " ") 

573 solver_latex_list.add_item( 

574 pl.UnsafeCommand( 

575 f"textbf{{{solver_name}}} ({len(configs[solver])} configurations)" 

576 ) 

577 ) 

578 report.append("The following Instance Sets were used in the portfolio:") 

579 instance_sets = set(Path(instance).parent.name for instance in scenario.instances) 

580 instance_set_count = [ 

581 len([i for i in scenario.instances if Path(i).parent.name == s]) 

582 for s in instance_sets 

583 ] 

584 with report.create(pl.Itemize()) as instance_set_latex_list: 

585 for set_name, set_size in zip(instance_sets, instance_set_count): 

586 set_name = set_name.replace("_", " ") # Latex fix 

587 instance_set_latex_list.add_item( 

588 pl.UnsafeCommand(f"textbf{{{set_name}}} ({set_size} instances)") 

589 ) 

590 # 2. List which solver was the best on how many instances 

591 report.append(pl.Subsection("Portfolio Performance")) 

592 objective = scenario.objectives[0] 

593 report.append( 

594 f"The objective for the portfolio is {objective}. The " 

595 "following performance of the solvers was found over the instances: " 

596 ) 

597 best_solver_count = {solver: 0 for solver in scenario.solvers} 

598 for instance in scenario.instances: 

599 ranking = scenario.get_solver_ranking(objective=objective, instances=[instance]) 

600 best_solver_count[ranking[0][0]] += 1 

601 

602 with report.create(pl.Itemize()) as latex_list: 

603 for solver, count in best_solver_count.items(): 

604 solver_name = solver.replace("_", " ") 

605 latex_list.add_item( 

606 pl.UnsafeCommand( 

607 f"textbf{{{solver_name}}} was the best solver on {count} instance(s)." 

608 ) 

609 ) 

610 # TODO Report how many instances remained unsolved 

611 

612 # 3. Create table showing the performance of the portfolio vs and all solvers, 

613 # by showing the status count and number of times the solver was best 

614 solver_cancelled_count = {solver: 0 for solver in scenario.solvers} 

615 solver_timeout_count = {solver: 0 for solver in scenario.solvers} 

616 status_objective = [ 

617 o for o in scenario.objective_names if o.lower().startswith("status") 

618 ][0] 

619 cancelled_status = [ 

620 SolverStatus.UNKNOWN, 

621 SolverStatus.CRASHED, 

622 SolverStatus.WRONG, 

623 SolverStatus.ERROR, 

624 SolverStatus.KILLED, 

625 ] 

626 for solver in scenario.solvers: 

627 status = scenario.get_value(solver=solver, objective=status_objective) 

628 for status in scenario.get_value(solver=solver, objective=status_objective): 

629 status = SolverStatus(status) 

630 if status in cancelled_status: 

631 solver_cancelled_count[solver] += 1 

632 elif status == SolverStatus.TIMEOUT: 

633 solver_timeout_count[solver] += 1 

634 

635 report.append(latex.AutoRef("tab:parallelportfoliotable")) 

636 report.append(pl.utils.bold(" ")) 

637 report.append(" shows the performance of the portfolio on the test set(s).") 

638 tabular = pl.Tabular("r|rrrr") 

639 tabular.add_row(["Solver", objective, "# Timeouts", "# Cancelled", "# Best"]) 

640 tabular.add_hline() 

641 solver_performance = { 

642 solver: round(performance, MAX_DEC) 

643 for solver, _, performance in scenario.get_solver_ranking(objective=objective) 

644 } 

645 for solver in scenario.solvers: 

646 tabular.add_row( 

647 solver, 

648 solver_performance[solver], 

649 solver_timeout_count[solver], 

650 solver_cancelled_count[solver], 

651 best_solver_count[solver], 

652 ) 

653 tabular.add_hline() 

654 portfolio_performance = round( 

655 scenario.best_performance(objective=objective), MAX_DEC 

656 ) 

657 tabular.add_row( 

658 portfolio_name, 

659 portfolio_performance, 

660 sum(solver_timeout_count.values()), 

661 sum(solver_cancelled_count.values()), 

662 sum(best_solver_count.values()), 

663 ) 

664 table_portfolio = pl.Table(position="h") 

665 table_portfolio.append(pl.UnsafeCommand("centering")) 

666 table_portfolio.append(tabular) 

667 table_portfolio.add_caption("Parallel Portfolio Performance") 

668 table_portfolio.append(pl.UnsafeCommand(r"label{tab:parallelportfoliotable}")) 

669 report.append(table_portfolio) 

670 

671 # 4. Create scatter plot analysis between the portfolio and the single best solver 

672 sbs_name = scenario.get_solver_ranking(objective=objective)[0][0] 

673 sbs_instance_performance = scenario.get_value( 

674 solver=sbs_name, objective=objective.name 

675 ) 

676 sbs_name = Path(sbs_name).name 

677 report.append(latex.AutoRef("fig:portfoliovssbs")) 

678 report.append(pl.utils.bold(" ")) 

679 report.append( 

680 " shows the emprical comparison between the portfolio and the single " 

681 f"best solver (SBS) {sbs_name}." 

682 ) 

683 portfolio_instance_performance = scenario.best_instance_performance( 

684 objective=objective.name 

685 ).tolist() 

686 

687 df = pd.DataFrame( 

688 [sbs_instance_performance, portfolio_instance_performance], 

689 index=[f"SBS ({sbs_name}) Performance", "Portfolio Performance"], 

690 dtype=float, 

691 ).T 

692 plot = latex.comparison_plot(df, None) 

693 plot_path = plot_dir / f"sbs_{sbs_name}_vs_parallel_portfolio.pdf" 

694 plot.write_image(plot_path, width=500, height=500) 

695 with report.create(pl.Figure(position="h")) as figure: 

696 figure.add_image( 

697 str(plot_path.relative_to(report_dir)), 

698 width=pl.utils.NoEscape(r"0.6\textwidth"), 

699 ) 

700 figure.add_caption(f"Portfolio vs SBS Performance ({objective})") 

701 figure.append(pl.UnsafeCommand(r"label{fig:portfoliovssbs}")) 

702 

703 

704def append_dataframe_longtable( 

705 report: pl.Document, 

706 df: pd.DataFrame, 

707 caption: str, 

708 label: str, 

709 max_cols: int = MAX_COLS_PER_TABLE, 

710 wide_threshold: int = WIDE_TABLE_THRESHOLD, 

711 num_keys: int = NUM_KEYS_PDF, 

712) -> None: 

713 """Appends a pandas DataFrame to a PyLaTeX document as one or more LaTeX longtables. 

714 

715 Args: 

716 report: The PyLaTeX document to which the table(s) will be appended. 

717 df: The DataFrame to be rendered as LaTeX longtable(s). 

718 caption: The caption for the table(s). 

719 label: The LaTeX label for referencing the table(s). 

720 max_cols: Maximum number of columns per table chunk. 

721 Defaults to MAX_COLS_PER_TABLE. 

722 wide_threshold: Number of columns above which the table is rotated 

723 to landscape. Defaults to WIDE_TABLE_THRESHOLD. 

724 num_keys: Number of key columns to include in each table chunk. 

725 Defaults to NUM_KEYS_PDF. 

726 

727 Returns: 

728 None 

729 """ 

730 import math 

731 from typing import Union 

732 

733 def latex_escape_text(s: str) -> str: 

734 """Escape special LaTeX characters in a string.""" 

735 # escape text, but insert our own LaTeX macro around it 

736 return ( 

737 s.replace("\\", r"\textbackslash{}") 

738 .replace("&", r"\&") 

739 .replace("%", r"\%") 

740 .replace("$", r"\$") 

741 .replace("#", r"\#") 

742 .replace("_", r"\_") 

743 .replace("{", r"\{") 

744 .replace("}", r"\}") 

745 .replace("~", r"\textasciitilde{}") 

746 .replace("^", r"\textasciicircum{}") 

747 ) 

748 

749 def last_path_segment(text: str) -> str: 

750 """Keep only the last non-empty path-like segment. Handles both back and forwardslashes. Removes any leading/trailing slashes.""" 

751 t = str(text).strip().replace("\\", "/") 

752 parts = [p for p in t.split("/") if p] # ignore empty segments 

753 return parts[-1] if parts else "" 

754 

755 def wrap_fixed_shortstack(cell: str, width: int = MAX_CELL_LEN) -> str: 

756 """Wrap long text to a fixed width for LaTeX tables.""" 

757 string_cell = last_path_segment(cell) 

758 if len(string_cell) <= width: 

759 return latex_escape_text(string_cell) 

760 chunks = [ 

761 latex_escape_text(string_cell[index : index + width]) 

762 for index in range(0, len(string_cell), width) 

763 ] 

764 # left-aligned shortstack: forces line breaks and grows row height 

765 return r"\shortstack[l]{" + r"\\ ".join(chunks) + "}" 

766 

767 def wrap_header_labels( 

768 df: pd.DataFrame, width_per_cell: int = MAX_CELL_LEN 

769 ) -> pd.DataFrame: 

770 """Wrap long header labels to a fixed width for LaTeX tables.""" 

771 df_copy = df.copy() 

772 if isinstance(df_copy.columns, pd.MultiIndex): 

773 new_cols = [] 

774 for tup in df_copy.columns: 

775 new_cols.append( 

776 tuple( 

777 wrap_fixed_shortstack(last_path_segment(index), width_per_cell) 

778 if isinstance(index, str) 

779 else index 

780 for index in tup 

781 ) 

782 ) 

783 names = [ 

784 ( 

785 wrap_fixed_shortstack(last_path_segment(name), width_per_cell) 

786 if isinstance(name, str) 

787 else name 

788 ) 

789 for name in (df_copy.columns.names or []) 

790 ] 

791 df_copy.columns = pd.MultiIndex.from_tuples(new_cols, names=names) 

792 else: 

793 df_copy.columns = [ 

794 wrap_fixed_shortstack(last_path_segment(column), width_per_cell) 

795 if isinstance(column, str) 

796 else column 

797 for column in df_copy.columns 

798 ] 

799 return df_copy 

800 

801 def format_cell(cell: Union[int, float, str]) -> str: 

802 """Format a cell for printing in a LaTeX table.""" 

803 try: 

804 float_cell = float(cell) 

805 except (TypeError, ValueError): 

806 return wrap_fixed_shortstack(last_path_segment(str(cell)), MAX_CELL_LEN) 

807 

808 if not math.isfinite(float_cell): 

809 return "NaN" 

810 

811 if float_cell.is_integer(): 

812 return str(int(float_cell)) 

813 # round to MAX_DEC, then strip trailing zeros 

814 s = f"{round(float_cell, MAX_DEC):.{MAX_DEC}f}".rstrip("0").rstrip(".") 

815 return s 

816 

817 df_copy = df.copy() 

818 

819 # Inorder to be able to show the key columns, we need to reset the index 

820 if not isinstance(df_copy.index, pd.RangeIndex) and df_copy.index.name in ( 

821 None, 

822 "index", 

823 "", 

824 ): 

825 df_copy = df_copy.reset_index() 

826 

827 # Remove the Seed column from the performance dataframe since it is not 

828 # very informative and clutters the table 

829 if isinstance(df, PerformanceDataFrame): 

830 mask = df_copy.columns.get_level_values("Meta") == "Seed" 

831 df_copy = df_copy.loc[:, ~mask] 

832 

833 # For performance dataframe, we want to show values of objectives with their corresponding instance and run. 

834 # Since objective, instance and run are indexes in the performance dataframe, 

835 # they will be part of the index and we need to reset the index to get them 

836 # as columns. 

837 # We'll name them as key columns, since they are the key to identify the value of the objective 

838 # for a given instance and run. 

839 # (Respectively FeatureGroup, FeatureName, Extractor in feature dataframe) 

840 keys = df_copy.iloc[:, :num_keys] # Key columns 

841 

842 # Split the dataframe into chunks of max_cols per page 

843 number_column_chunks = max((df_copy.shape[1] - 1) // max_cols + 1, 1) 

844 for i in range(number_column_chunks): 

845 report.append(NewPage()) 

846 full_part = None 

847 start_col = i * max_cols 

848 end_col = (i + 1) * max_cols 

849 

850 # Select the value columns for this chunk 

851 values = df_copy.iloc[ 

852 :, 

853 start_col + num_keys : end_col + num_keys, 

854 ] 

855 

856 # Concatenate the key and value columns 

857 full_part = pd.concat([keys, values], axis=1) 

858 

859 # If there are no value columns left, we are done 

860 if (full_part.shape[1]) <= num_keys: 

861 break 

862 

863 full_part_wrapped = wrap_header_labels(full_part, MAX_CELL_LEN) 

864 

865 # tell pandas how to print numbers 

866 formatters = {col: format_cell for col in full_part_wrapped.columns} 

867 

868 tex = full_part_wrapped.to_latex( 

869 longtable=True, 

870 index=False, 

871 escape=False, # We want to split the long words, not escape them 

872 caption=caption + (f" (part {i + 1})" if number_column_chunks > 1 else ""), 

873 label=label + f"-p{i + 1}" if number_column_chunks > 1 else label, 

874 float_format=None, 

875 multicolumn=True, 

876 multicolumn_format="c", 

877 multirow=False, 

878 column_format="c" * full_part_wrapped.shape[1], 

879 formatters=formatters, 

880 ) 

881 

882 # centre the whole table horizontally 

883 centred_tex = "\\begin{center}\n" + tex + "\\end{center}\n" 

884 

885 # rotate if still too wide 

886 if full_part_wrapped.shape[1] > wide_threshold: 

887 report.append(NoEscape(r"\begin{landscape}")) 

888 report.append(NoEscape(centred_tex)) 

889 report.append(NoEscape(r"\end{landscape}")) 

890 else: 

891 report.append(NoEscape(centred_tex)) 

892 

893 

894def generate_appendix( 

895 report: pl.Document, 

896 performance_data: PerformanceDataFrame, 

897 feature_data: FeatureDataFrame, 

898) -> None: 

899 """Appendix. 

900 

901 Args: 

902 report: The LaTeX document object to which the appendix will be added. 

903 performance_data: The performance data to be included in the appendix. 

904 feature_data: The feature data to be included in the appendix. 

905 

906 Returns: 

907 None 

908 """ 

909 # preamble 

910 for pkg in ("longtable", "pdflscape", "caption", "booktabs", "placeins"): 

911 p = pl.Package(pkg) 

912 if p not in report.packages: 

913 report.packages.append(p) 

914 

915 report.append(pl.NewPage()) 

916 report.append(pl.NoEscape(r"\clearpage")) 

917 report.append(pl.NoEscape(r"\FloatBarrier")) 

918 report.append(pl.UnsafeCommand("appendix")) 

919 report.append(pl.Section("Performance DataFrame")) 

920 

921 append_dataframe_longtable( 

922 report, 

923 performance_data, 

924 caption="Performance DataFrame", 

925 label="tab:perf_data", 

926 max_cols=MAX_COLS_PER_TABLE, 

927 wide_threshold=WIDE_TABLE_THRESHOLD, 

928 num_keys=NUM_KEYS_PDF, 

929 ) 

930 

931 report.append(pl.Section("Feature DataFrame")) 

932 append_dataframe_longtable( 

933 report, 

934 feature_data, 

935 caption="Feature DataFrame", 

936 label="tab:feature_data", 

937 max_cols=MAX_COLS_PER_TABLE, 

938 wide_threshold=WIDE_TABLE_THRESHOLD, 

939 num_keys=NUM_KEYS_FDF, 

940 ) 

941 

942 report.append(pl.NoEscape(r"\FloatBarrier")) 

943 

944 

945def main(argv: list[str]) -> None: 

946 """Generate a report for executed experiments in the platform.""" 

947 # Log command call 

948 sl.log_command(sys.argv, gv.settings().random_state) 

949 

950 # Define command line arguments 

951 parser = parser_function() 

952 

953 # Process command line arguments 

954 args = parser.parse_args(argv) 

955 

956 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path) 

957 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

958 

959 # Fetch all known scenarios 

960 configuration_scenarios = gv.configuration_scenarios(refresh=True) 

961 selection_scenarios = gv.selection_scenarios(refresh=True) 

962 parallel_portfolio_scenarios = gv.parallel_portfolio_scenarios() 

963 

964 # Filter scenarios based on args 

965 if args.solvers: 

966 solvers = [ 

967 resolve_object_name( 

968 s, gv.solver_nickname_mapping, gv.settings().DEFAULT_solver_dir, Solver 

969 ) 

970 for s in args.solvers 

971 ] 

972 configuration_scenarios = [ 

973 s 

974 for s in configuration_scenarios 

975 if s.solver.directory in [s.directory for s in solvers] 

976 ] 

977 selection_scenarios = [ 

978 s 

979 for s in selection_scenarios 

980 if set(s.solvers).intersection([str(s.directory) for s in solvers]) 

981 ] 

982 parallel_portfolio_scenarios = [ 

983 s 

984 for s in parallel_portfolio_scenarios 

985 if set(s.solvers).intersection([str(s.directory) for s in solvers]) 

986 ] 

987 if args.instance_sets: 

988 instance_sets = [ 

989 resolve_object_name( 

990 s, 

991 gv.instance_set_nickname_mapping, 

992 gv.settings().DEFAULT_instance_dir, 

993 Instance_Set, 

994 ) 

995 for s in args.instance_sets 

996 ] 

997 configuration_scenarios = [ 

998 s 

999 for s in configuration_scenarios 

1000 if s.instance_set.directory in [s.directory for s in instance_sets] 

1001 ] 

1002 selection_scenarios = [ 

1003 s 

1004 for s in selection_scenarios 

1005 if set(s.instance_sets).intersection([str(s.name) for s in instance_sets]) 

1006 ] 

1007 parallel_portfolio_scenarios = [ 

1008 s 

1009 for s in parallel_portfolio_scenarios 

1010 if set(s.instance_sets).intersection([str(s.name) for s in instance_sets]) 

1011 ] 

1012 

1013 processed_configuration_scenarios = [] 

1014 processed_selection_scenarios = [] 

1015 possible_test_sets = [ 

1016 Instance_Set(p) for p in gv.settings().DEFAULT_instance_dir.iterdir() 

1017 ] 

1018 for configuration_scenario in configuration_scenarios: 

1019 processed_configuration_scenarios.append( 

1020 ( 

1021 ConfigurationOutput( 

1022 configuration_scenario, performance_data, possible_test_sets 

1023 ), 

1024 configuration_scenario, 

1025 ) 

1026 ) 

1027 for selection_scenario in selection_scenarios: 

1028 processed_selection_scenarios.append( 

1029 (SelectionOutput(selection_scenario), selection_scenario) 

1030 ) 

1031 

1032 raw_output = gv.settings().DEFAULT_output_analysis / "JSON" 

1033 if raw_output.exists(): # Clean 

1034 shutil.rmtree(raw_output) 

1035 raw_output.mkdir() 

1036 

1037 # Write JSON 

1038 output_json = {} 

1039 for output, configuration_scenario in processed_configuration_scenarios: 

1040 output_json[configuration_scenario.name] = output.serialise() 

1041 for output, selection_scenario in processed_selection_scenarios: 

1042 output_json[selection_scenario.name] = output.serialise() 

1043 # TODO: We do not have an output object for parallel portfolios 

1044 

1045 raw_output_json = raw_output / "output.json" 

1046 with raw_output_json.open("w") as f: 

1047 json.dump(output_json, f, indent=4) 

1048 

1049 print(f"Machine readable output written to: {raw_output_json}") 

1050 

1051 if args.only_json: # Done 

1052 sys.exit(0) 

1053 

1054 # TODO: Group scenarios based on: 

1055 # - Configuration / Selection / Parallel Portfolio 

1056 # - Training Instance Set / Testing Instance Set 

1057 # - Configurators can be merged as long as we can match their budgets clearly 

1058 report_directory = gv.settings().DEFAULT_output_analysis / "report" 

1059 if report_directory.exists(): # Clean it 

1060 shutil.rmtree(report_directory) 

1061 report_directory.mkdir() 

1062 target_path = report_directory / "report" 

1063 report = pl.document.Document( 

1064 default_filepath=str(target_path), document_options=["british"] 

1065 ) 

1066 bibpath = gv.settings().bibliography_path 

1067 newbibpath = report_directory / "report.bib" 

1068 shutil.copy(bibpath, newbibpath) 

1069 # BUGFIX for unknown package load in PyLatex 

1070 p = pl.package.Package("lastpage") 

1071 if p in report.packages: 

1072 report.packages.remove(p) 

1073 report.packages.append( 

1074 pl.package.Package( 

1075 "geometry", 

1076 options=[ 

1077 "verbose", 

1078 "tmargin=3.5cm", 

1079 "bmargin=3.5cm", 

1080 "lmargin=3cm", 

1081 "rmargin=3cm", 

1082 ], 

1083 ) 

1084 ) 

1085 # Unsafe command for \emph{Sparkle} 

1086 report.preamble.extend( 

1087 [ 

1088 pl.UnsafeCommand("title", r"\emph{Sparkle} Algorithm Portfolio report"), 

1089 pl.UnsafeCommand( 

1090 "author", 

1091 r"Generated by \emph{Sparkle} " 

1092 f"(version: {__sparkle_version__})", 

1093 ), 

1094 ] 

1095 ) 

1096 report.append(pl.Command("maketitle")) 

1097 report.append(pl.Section("Introduction")) 

1098 # TODO: A quick overview to the introduction on whats considered in the report 

1099 # regarding Solvers, Instance Sets and Feature Extractors 

1100 report.append( 

1101 pl.UnsafeCommand( 

1102 r"emph{Sparkle}~\cite{Hoos15} is a multi-agent problem-solving platform based on" 

1103 r" Programming by Optimisation (PbO)~\cite{Hoos12}, and would provide a number " 

1104 "of effective algorithm optimisation techniques (such as automated algorithm " 

1105 "configuration, portfolio-based algorithm selection, etc.) to accelerate the " 

1106 "existing solvers." 

1107 ) 

1108 ) 

1109 

1110 for scenario_output, scenario in processed_configuration_scenarios: 

1111 generate_configuration_section(report, scenario, scenario_output) 

1112 

1113 for scenario_output, scenario in processed_selection_scenarios: 

1114 generate_selection_section(report, scenario, scenario_output) 

1115 

1116 for parallel_dataframe in parallel_portfolio_scenarios: 

1117 generate_parallel_portfolio_section(report, parallel_dataframe) 

1118 

1119 # Check if user wants to add appendix and 

1120 settings = gv.settings(args) 

1121 if settings.appendices: 

1122 generate_appendix(report, performance_data, feature_data) 

1123 

1124 # Adding bibliography 

1125 report.append(pl.NewPage()) # Ensure it starts on new page 

1126 report.append(pl.Command("bibliographystyle", arguments=["plain"])) 

1127 report.append(pl.Command("bibliography", arguments=[str(newbibpath)])) 

1128 # Generate the report .tex and .pdf 

1129 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex") 

1130 # TODO: This should be done by PyLatex. Generate the bib and regenerate the report 

1131 # Reference for the (terrible) solution: https://tex.stackexchange.com/ 

1132 # questions/63852/question-mark-or-bold-citation-key-instead-of-citation-number 

1133 import subprocess 

1134 

1135 # Run BibTex silently 

1136 subprocess.run( 

1137 ["bibtex", newbibpath.with_suffix("")], 

1138 stdout=subprocess.DEVNULL, 

1139 stderr=subprocess.DEVNULL, 

1140 ) 

1141 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex") 

1142 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex") 

1143 print(f"Report generated at {target_path}.pdf") 

1144 sys.exit(0) 

1145 

1146 

1147if __name__ == "__main__": 

1148 main(sys.argv[1:])