Coverage for sparkle/CLI/generate

1#!/usr/bin/env python3

2"""Sparkle command to generate a report for an executed experiment."""

4import sys

5import shutil

6import argparse

7from pathlib import Path

8import time

9import json

10import pandas as pd

12from pylatex import NoEscape, NewPage

13import pylatex as pl

14from sparkle import __version__ as __sparkle_version__

16from sparkle.CLI.help import global_variables as gv

17from sparkle.CLI.help import resolve_object_name

18from sparkle.CLI.help import logging as sl

19from sparkle.CLI.help import argparse_custom as ac

21from sparkle.solver import Solver

22from sparkle.instance import Instance_Set

23from sparkle.selector import Extractor

24from sparkle.structures import PerformanceDataFrame, FeatureDataFrame

25from sparkle.configurator.configurator import ConfigurationScenario

26from sparkle.selector.selector import SelectionScenario

27from sparkle.types import SolverStatus

28from sparkle.platform import Settings

30from sparkle.platform import latex

31from sparkle.platform.output.configuration_output import ConfigurationOutput

32from sparkle.platform.output.selection_output import SelectionOutput

35MAX_DEC = 4 # Maximum decimals used for each reported value

36MAX_COLS_PER_TABLE = 2 # number of value columns extra to number of key columns

37WIDE_TABLE_THRESHOLD = 4 # columns above which we switch to landscape

38NUM_KEYS_PDF = 3

39NUM_KEYS_FDF = 3

40MAX_CELL_LEN = 17

43def parser_function() -> argparse.ArgumentParser:

44 """Define the command line arguments."""

45 parser = argparse.ArgumentParser(

46 description="Generates a report for all known selection, configuration and "

47 "parallel portfolio scenarios will be generated.",

48 epilog="If you wish to filter specific solvers, instance sets, ... have a look "

49 "at the command line arguments.",

50 )

51 # Add argument for filtering solvers

52 parser.add_argument(

53 *ac.SolversReportArgument.names, **ac.SolversReportArgument.kwargs

54 )

55 # Add argument for filtering instance sets

56 parser.add_argument(

57 *ac.InstanceSetsReportArgument.names, **ac.InstanceSetsReportArgument.kwargs

58 )

60 # Add argument for filtering appendix

61 parser.add_argument(

62 *Settings.OPTION_appendices.args, **Settings.OPTION_appendices.kwargs

63 )

65 # Add argument for filtering configurators?

66 # Add argument for filtering selectors?

67 # Add argument for filtering ??? scenario ids? configuration ids?

68 parser.add_argument(*ac.GenerateJSONArgument.names, **ac.GenerateJSONArgument.kwargs)

69 return parser

72def generate_configuration_section(

73 report: pl.Document,

74 scenario: ConfigurationScenario,

75 scenario_output: ConfigurationOutput,

76) -> None:

77 """Generate a section for a configuration scenario."""

78 report_dir = Path(report.default_filepath).parent

79 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))

80 plot_dir = (

81 report_dir

82 / f"{scenario.configurator.__name__}_{scenario.name}_plots_{time_stamp}"

83 )

84 plot_dir.mkdir(exist_ok=True)

86 # 1. Write section intro

87 report.append(

88 pl.Section(

89 f"{scenario.configurator.__name__} Configuration: "

90 f"{scenario.solver.name} on {scenario.instance_set.name}"

91 )

92 )

93 report.append("In this scenario, ")

94 report.append(

95 pl.UnsafeCommand(

96 f"textbf{{{scenario.configurator.__name__}}} "

97 f"({scenario.configurator.full_name})~\\cite"

98 f"{{{scenario.configurator.__name__}}} with version "

99 f"{scenario.configurator.version} was used for configuration. "

100 )

101 )

102 report.append(

103 f"The Solver {scenario.solver} was optimised on training set "

104 f"{scenario.instance_set}. The scenario was run {scenario.number_of_runs} "

105 f"times independently with different seeds, yielding {scenario.number_of_runs} "

106 f"configurations. The cutoff time for the solver was set to "

107 f"{scenario.solver_cutoff_time} seconds. The optimised objective is "

108 f"{scenario.sparkle_objectives[0]}. Each Configuration was evaluated on the "

109 "training set to determine the best configuration, e.g. the best "

110 f"{scenario.sparkle_objectives[0]} value on the training set."

111 )

112

113 # 2. Report all the configurator settings in table format

114 report.append(pl.Subsection("Configurator Settings"))

115 report.append(

116 f"The following settings were used for {scenario.configurator.__name__}:\n"

117 )

118 tabular = pl.Tabular("l|r")

119 tabular.add_row("Setting", "Value")

120 tabular.add_hline()

121 for setting, value in scenario.serialise().items():

122 # Keep only the last path segment for paths

123 # Otherwise tables get too wide and we can't see other values

124 t = str(value).strip().replace("\\", "/")

125 parts = [p for p in t.split("/") if p]

126 if parts[-1]:

127 tabular.add_row([setting, parts[-1]])

128 else:

129 tabular.add_row([setting, "None"])

130 table_conf_settings = pl.Table(position="h")

131 table_conf_settings.append(pl.UnsafeCommand("centering"))

132 table_conf_settings.append(tabular)

133 table_conf_settings.add_caption("Configurator Settings")

134 report.append(table_conf_settings)

135

136 # 3. Report details on instance and solver used

137 report.append(pl.Subsection("Solver & Instance Set(s) Details"))

138 cs = scenario_output.solver.get_configuration_space()

139 report.append(

140 f"The solver {scenario_output.solver} was configured using "

141 f"{len(cs.values())} configurable (hyper)parameters. "

142 f"The configuration space has {len(cs.conditions)} conditions. "

143 )

144 report.append("The following instance sets were used for the scenario:")

145 with report.create(pl.Itemize()) as instance_set_latex_list:

146 for instance_set in [

147 scenario_output.instance_set_train

148 ] + scenario_output.test_instance_sets:

149 training_set_name = instance_set.name.replace("_", " ") # Latex fix

150 instance_set_latex_list.add_item(

151 pl.UnsafeCommand(

152 f"textbf{{{training_set_name}}} ({instance_set.size} instances)"

153 )

154 )

155

156 # Function to generate a results summary of default vs best on an instance set

157 def instance_set_summary(instance_set_name: str) -> None:

158 """Generate a results summary of default vs best on an instance set."""

159 instance_set_results = scenario_output.instance_set_results[instance_set_name]

160 report.append(

161 f"The {scenario.sparkle_objectives[0]} value of the Default "

162 f"Configuration on {instance_set_name} was "

163 )

164 report.append(

165 pl.UnsafeCommand(

166 f"textbf{{{round(instance_set_results.default_performance, MAX_DEC)}}}.\n"

167 )

168 )

169 report.append(

170 f"The {scenario.sparkle_objectives[0]} value of the Best "

171 f"Configuration on {instance_set_name} was "

172 )

173 report.append(

174 pl.UnsafeCommand(

175 f"textbf{{{round(instance_set_results.best_performance, MAX_DEC)}}}.\n"

176 )

177 )

178 report.append("In ")

179 report.append(latex.AutoRef(f"fig:bestvsdefault{instance_set_name}{time_stamp}"))

180 report.append(pl.utils.bold(" ")) # Force white space

181 report.append("the results are plotted per instance.")

182 # Create graph to compare best configuration vs default on the instance set

183

184 df = pd.DataFrame(

185 [

186 instance_set_results.default_instance_performance,

187 instance_set_results.best_instance_performance,

188 ],

189 index=["Default Configuration", "Best Configuration"],

190 dtype=float,

191 ).T

192 plot = latex.comparison_plot(df, None)

193 plot_path = (

194 plot_dir / f"{scenario_output.best_configuration_key}_vs_"

195 f"Default_{instance_set_name}.pdf"

196 )

197 plot.write_image(plot_path, width=500, height=500)

198 with report.create(pl.Figure(position="h")) as figure:

199 figure.add_image(

200 str(plot_path.relative_to(report_dir)),

201 width=pl.utils.NoEscape(r"0.6\textwidth"),

202 )

203 figure.add_caption(

204 f"Best vs Default Performance on {instance_set_name} "

205 f"({scenario.sparkle_objectives[0]})"

206 )

207 figure.append(

208 pl.UnsafeCommand(

209 r"label{"

210 f"fig:bestvsdefault{instance_set_name}{time_stamp}"

211 r"}"

212 )

213 )

214 if scenario.sparkle_objectives[0].time: # Write status table

215 report.append("The following Solver status were found per instance:")

216 tabular = pl.Tabular("l|c|c|c")

217 tabular.add_row("Status", "Default", "Best", "Overlap")

218 tabular.add_hline()

219 # Count the statuses

220 for status in SolverStatus:

221 default_count, best_count, overlap_count = 0, 0, 0

222 for instance in instance_set_results.instance_status_default.keys():

223 instance = str(instance)

224 default_hit = (

225 instance_set_results.instance_status_default[instance] == status

226 )

227 best_hit = (

228 instance_set_results.instance_status_best[instance] == status

229 )

230 default_count += default_hit

231 best_count += best_hit

232 overlap_count += default_hit and best_hit

233 if default_count or best_count:

234 tabular.add_row(status, default_count, best_count, overlap_count)

235 table_status_values = pl.Table(position="h")

236 table_status_values.append(pl.UnsafeCommand("centering"))

237 table_status_values.append(tabular)

238 table_status_values.add_caption(

239 "Status count for the best and default configuration."

240 )

241 report.append(table_status_values)

242

243 # 4. Report the results of the best configuration on the training set vs the default

244 report.append(

245 pl.Subsection(

246 f"Comparison of Default and Best Configuration on Training Set "

247 f"{scenario_output.instance_set_train.name}"

248 )

249 )

250 instance_set_summary(scenario_output.instance_set_train.name)

251

252 # 5. Report the actual config values

253 report.append(pl.Subsubsection("Best Configuration Values"))

254 if (

255 scenario_output.best_configuration_key

256 == PerformanceDataFrame.default_configuration

257 ):

258 report.append(

259 "The configurator failed to find a better configuration than the "

260 "default configuration on the training set in this scenario."

261 )

262 else:

263 report.append(

264 "The following parameter values "

265 "were found to be the best on the training set:\n"

266 )

267 tabular = pl.Tabular("l|r")

268 tabular.add_row("Parameter", "Value")

269 tabular.add_hline()

270 for parameter, value in scenario_output.best_configuration.items():

271 tabular.add_row([parameter, str(value)])

272 table_best_values = pl.Table(position="h")

273 table_best_values.append(pl.UnsafeCommand("centering"))

274 table_best_values.append(tabular)

275 table_best_values.add_caption("Best found configuration values")

276 report.append(table_best_values)

277

278 # 6. Report the results of best vs default conf on the test sets

279

280 for test_set in scenario_output.test_instance_sets:

281 report.append(

282 pl.Subsection(

283 f"Comparison of Default and Best Configuration on Test Set "

284 f"{test_set.name}"

285 )

286 )

287 instance_set_summary(test_set.name)

288

289 # 7. Report the parameter ablation scenario if present

290 if scenario.ablation_scenario:

291 report.append(pl.Subsection("Parameter importance via Ablation"))

292 report.append("Ablation analysis ")

293 report.append(pl.UnsafeCommand(r"cite{FawcettHoos16} "))

294 test_set = scenario.ablation_scenario.test_set

295 if not scenario.ablation_scenario.test_set:

296 test_set = scenario.ablation_scenario.train_set

297 report.append(

298 f"is performed from the default configuration of {scenario.solver} to the "

299 f"best found configuration ({scenario_output.best_configuration_key}) "

300 "to see which parameter changes between them contribute most to the improved"

301 " performance. The ablation path uses the training set "

302 f"{scenario.ablation_scenario.train_set.name} and validation is performed "

303 f"on the test set {test_set.name}. The set of parameters that differ in the "

304 "two configurations will form the ablation path. Starting from the default "

305 "configuration, the path is computed by performing a sequence of rounds. In"

306 " a round, each available parameter is flipped in the configuration and is "

307 "validated on its performance. The flipped parameter with the best "

308 "performance in that round, is added to the configuration and the next round"

309 " starts with the remaining parameters. This repeats until all parameters "

310 "are flipped, which is the best found configuration. The analysis resulted "

311 "in the ablation presented in "

312 )

313 report.append(latex.AutoRef("tab:ablationtable"))

314 report.append(".")

315

316 # Add ablation table

317 tabular = pl.Tabular("r|l|r|r|r")

318 data = scenario.ablation_scenario.read_ablation_table()

319 for index, row in enumerate(data):

320 tabular.add_row(*row)

321 if index == 0:

322 tabular.add_hline()

323 table_ablation = pl.Table(position="h")

324 table_ablation.append(pl.UnsafeCommand("centering"))

325 table_ablation.append(tabular)

326 table_ablation.add_caption("Ablation table")

327 table_ablation.append(pl.UnsafeCommand(r"label{tab:ablationtable}"))

328 report.append(table_ablation)

329

330

331def generate_selection_section(

332 report: pl.Document, scenario: SelectionScenario, scenario_output: SelectionOutput

333) -> None:

334 """Generate a section for a selection scenario."""

335 report_dir = Path(report.default_filepath).parent

336 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))

337 plot_dir = report_dir / f"{scenario.name.replace(' ', '_')}_plots_{time_stamp}"

338 plot_dir.mkdir(exist_ok=True)

339 report.append(

340 pl.Section(

341 f"Selection: {scenario.selector.model_class.__name__} on "

342 f"{' '.join([s[0] for s in scenario_output.training_instance_sets])}"

343 )

344 )

345 report.append(

346 f"In this scenario, a {scenario.selector.model_class.__name__} "

347 f" ({scenario.selector.selector_class.__name__}) was trained on the "

348 "performance and feature data using ASF-lib. The following solvers "

349 f"were run with a cutoff time of {scenario.solver_cutoff} seconds:"

350 )

351 with report.create(pl.Itemize()) as solver_latex_list:

352 for solver_name in scenario_output.solvers.keys():

353 solver_name = solver_name.replace("_", " ")

354 solver_latex_list.add_item(

355 pl.UnsafeCommand(

356 f"textbf{{{solver_name}}} "

357 f"({len(scenario_output.solvers[solver_name])} configurations)"

358 )

359 )

360 # Report training instance sets

361 report.append("The following training instance sets were used:")

362 with report.create(pl.Itemize()) as instance_set_latex_list:

363 for training_set_name, set_size in scenario_output.training_instance_sets:

364 training_set_name = training_set_name.replace("_", " ") # Latex fix

365 instance_set_latex_list.add_item(

366 pl.UnsafeCommand(f"textbf{{{training_set_name}}} ({set_size} instances)")

367 )

368 # Report feature extractors

369 report.append(

370 "The following feature extractors were used with a extractor cutoff "

371 f"time of {scenario.extractor_cutoff} seconds:"

372 )

373 with report.create(pl.Itemize()) as feature_extractor_latex_list:

374 for feature_extractor_name in scenario.feature_extractors:

375 extractor = resolve_object_name(

376 feature_extractor_name,

377 gv.file_storage_data_mapping[gv.extractor_nickname_list_path],

378 gv.settings().DEFAULT_extractor_dir,

379 class_name=Extractor,

380 )

381 feature_extractor_name = feature_extractor_name.replace("_", " ") # Latex

382 feature_extractor_latex_list.add_item(

383 pl.UnsafeCommand(

384 f"textbf{{{feature_extractor_name}}} "

385 f"({extractor.output_dimension} features)"

386 )

387 )

388 # Report Training results

389 report.append(pl.Subsection("Training Results"))

390 # 1. Report VBS and selector performance, create ranking list of the solvers

391 # TODO Add ref here to the training sets section?

392 report.append(

393 f"In this section, the {scenario.objective.name} results for the "

394 "portfolio selector on solving the training instance set(s) listed "

395 "is reported. "

396 )

397 report.append(

398 f"The {scenario.objective.name} values for the Virtual Best Solver "

399 "(VBS), i.e., the perfect portfolio selector is "

400 )

401 report.append(pl.utils.bold(f"{round(scenario_output.vbs_performance, MAX_DEC)}"))

402 report.append(", the actual portfolio selector performance is ")

403 report.append(

404 pl.utils.bold(f"{round(scenario_output.actual_performance, MAX_DEC)}.\n")

405 )

406

407 report.append(

408 f"Below, the solvers are ranked based on {scenario.objective.name} performance:"

409 )

410 with report.create(pl.Enumerate()) as ranking_list:

411 for solver_name, conf_id, value in scenario_output.solver_performance_ranking:

412 value = round(value, MAX_DEC)

413 solver_name = solver_name.replace("_", " ") # Latex fix

414 conf_id = conf_id.replace("_", " ") # Latex fix

415 ranking_list.add_item(

416 pl.UnsafeCommand(f"textbf{{{solver_name}}} ({conf_id}): {value}")

417 )

418

419 # 2. Marginal contribution ranking list VBS

420 report.append(pl.Subsubsection("Marginal Contribution Ranking List"))

421 report.append(

422 "The following list shows the marginal contribution ranking list for the VBS:"

423 )

424 with report.create(pl.Enumerate()) as ranking_list:

425 for (

426 solver_name,

427 conf_id,

428 contribution,

429 performance,

430 ) in scenario_output.marginal_contribution_perfect:

431 contribution, performance = (

432 round(contribution, MAX_DEC),

433 round(performance, MAX_DEC),

434 )

435 solver_name = solver_name.replace("_", " ") # Latex fix

436 conf_id = conf_id.replace("_", " ") # Latex fix

437 ranking_list.add_item(

438 pl.UnsafeCommand(

439 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})"

440 )

441 )

442

443 # 3. Marginal contribution ranking list actual selector

444 report.append(

445 "The following list shows the marginal contribution ranking list for "

446 "the actual portfolio selector:"

447 )

448 with report.create(pl.Enumerate()) as ranking_list:

449 for (

450 solver_name,

451 conf_id,

452 contribution,

453 performance,

454 ) in scenario_output.marginal_contribution_actual:

455 contribution, performance = (

456 round(contribution, MAX_DEC),

457 round(performance, MAX_DEC),

458 )

459 solver_name = solver_name.replace("_", " ") # Latex fix

460 conf_id = conf_id.replace("_", " ") # Latex fix

461 ranking_list.add_item(

462 pl.UnsafeCommand(

463 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})"

464 )

465 )

466

467 # 4. Create scatter plot analysis

468 report.append(pl.Subsubsection("Scatter Plot Analysis"))

469 report.append(latex.AutoRef(f"fig:sbsvsselector{time_stamp}"))

470 report.append(pl.utils.bold(" ")) # Trick to force a white space

471 report.append(

472 "shows the empirical comparison between the portfolio "

473 "selector and the single best solver (SBS). "

474 )

475 report.append(latex.AutoRef("fig:vbsvsselector"))

476 report.append(pl.utils.bold(" ")) # Trick to force a white space

477 report.append(

478 "shows the empirical comparison between the actual portfolio selector "

479 "and the virtual best solver (VBS)."

480 )

481 # Create figure on SBS versus the selector

482 sbs_name, sbs_config, _ = scenario_output.solver_performance_ranking[0]

483 # sbs_plot_name = f"{Path(sbs_name).name} ({sbs_config})"

484 sbs_performance = scenario_output.sbs_performance

485 selector_performance = scenario_output.actual_performance_data

486

487 # Join the data together

488

489 df = pd.DataFrame(

490 [sbs_performance, selector_performance],

491 index=[f"{Path(sbs_name).name} ({sbs_config})", "Selector"],

492 dtype=float,

493 ).T

494 plot = latex.comparison_plot(df, "Single Best Solver vs Selector")

495 plot_path = (

496 plot_dir / f"{Path(sbs_name).name}_{sbs_config}_vs_"

497 f"Selector_{scenario.selector.model_class.__name__}.pdf"

498 )

499 plot.write_image(plot_path, width=500, height=500)

500 with report.create(pl.Figure()) as figure:

501 figure.add_image(

502 str(plot_path.relative_to(report_dir)),

503 width=pl.utils.NoEscape(r"0.6\textwidth"),

504 )

505 figure.add_caption(

506 "Empirical comparison between the Single Best Solver and the Selector"

507 )

508 label = r"label{fig:sbsvsselector" + str(time_stamp) + r"}"

509 figure.append(pl.UnsafeCommand(f"{label}"))

510

511 # Comparison between the actual portfolio selector in Sparkle and the VBS.

512 vbs_performance = scenario_output.vbs_performance_data.tolist()

513 df = pd.DataFrame(

514 [vbs_performance, selector_performance],

515 index=["Virtual Best Solver", "Selector"],

516 dtype=float,

517 ).T

518 plot = latex.comparison_plot(df, "Virtual Best Solver vs Selector")

519 plot_path = (

520 plot_dir

521 / f"Virtual_Best_Solver_vs_Selector_{scenario.selector.model_class.__name__}.pdf"

522 )

523 plot.write_image(plot_path, width=500, height=500)

524 with report.create(pl.Figure()) as figure:

525 figure.add_image(

526 str(plot_path.relative_to(report_dir)),

527 width=pl.utils.NoEscape(r"0.6\textwidth"),

528 )

529 figure.add_caption(

530 "Empirical comparison between the Virtual Best Solver and the Selector"

531 )

532 figure.append(pl.UnsafeCommand(r"label{fig:vbsvsselector}"))

533

534 if scenario_output.test_sets:

535 report.append(pl.Subsection("Test Results"))

536 report.append("The following results are reported on the test set(s):")

537 with report.create(pl.Itemize()) as latex_list:

538 for test_set_name, test_set_size in scenario_output.test_sets:

539 result = round(

540 scenario_output.test_set_performance[test_set_name], MAX_DEC

541 )

542 latex_list.add_item(

543 pl.UnsafeCommand(

544 f"textbf{{{test_set_name}}} ({test_set_size} instances): {result}"

545 )

546 )

547

548

549def generate_parallel_portfolio_section(

550 report: pl.Document, scenario: PerformanceDataFrame

551) -> None:

552 """Generate a section for a parallel portfolio scenario."""

553 report_dir = Path(report.default_filepath).parent

554 portfolio_name = scenario.csv_filepath.parent.name

555 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))

556 plot_dir = report_dir / f"{portfolio_name.replace(' ', '_')}_plots_{time_stamp}"

557 plot_dir.mkdir()

558 report.append(pl.Section(f"Parallel Portfolio {portfolio_name}"))

559 report.append(

560 "In this scenario, Sparkle runs the portfolio of Solvers on each instance in "

561 "parallel with "

562 f"{gv.settings().parallel_portfolio_num_seeds_per_solver} different "

563 "seeds. The cutoff time for each solver run is set to "

564 f"{gv.settings().solver_cutoff_time} seconds."

565 )

566 report.append(pl.Subsection("Solvers & Instance Sets"))

567 report.append("The following Solvers were used in the portfolio:")

568 # 1. Report on the Solvers and Instance Sets used for the portfolio

569 with report.create(pl.Itemize()) as solver_latex_list:

570 configs = scenario.configurations

571 for solver in scenario.solvers:

572 solver_name = solver.replace("_", " ")

573 solver_latex_list.add_item(

574 pl.UnsafeCommand(

575 f"textbf{{{solver_name}}} ({len(configs[solver])} configurations)"

576 )

577 )

578 report.append("The following Instance Sets were used in the portfolio:")

579 instance_sets = set(Path(instance).parent.name for instance in scenario.instances)

580 instance_set_count = [

581 len([i for i in scenario.instances if Path(i).parent.name == s])

582 for s in instance_sets

583 ]

584 with report.create(pl.Itemize()) as instance_set_latex_list:

585 for set_name, set_size in zip(instance_sets, instance_set_count):

586 set_name = set_name.replace("_", " ") # Latex fix

587 instance_set_latex_list.add_item(

588 pl.UnsafeCommand(f"textbf{{{set_name}}} ({set_size} instances)")

589 )

590 # 2. List which solver was the best on how many instances

591 report.append(pl.Subsection("Portfolio Performance"))

592 objective = scenario.objectives[0]

593 report.append(

594 f"The objective for the portfolio is {objective}. The "

595 "following performance of the solvers was found over the instances: "

596 )

597 best_solver_count = {solver: 0 for solver in scenario.solvers}

598 for instance in scenario.instances:

599 ranking = scenario.get_solver_ranking(objective=objective, instances=[instance])

600 best_solver_count[ranking[0][0]] += 1

601

602 with report.create(pl.Itemize()) as latex_list:

603 for solver, count in best_solver_count.items():

604 solver_name = solver.replace("_", " ")

605 latex_list.add_item(

606 pl.UnsafeCommand(

607 f"textbf{{{solver_name}}} was the best solver on {count} instance(s)."

608 )

609 )

610 # TODO Report how many instances remained unsolved

611

612 # 3. Create table showing the performance of the portfolio vs and all solvers,

613 # by showing the status count and number of times the solver was best

614 solver_cancelled_count = {solver: 0 for solver in scenario.solvers}

615 solver_timeout_count = {solver: 0 for solver in scenario.solvers}

616 status_objective = [

617 o for o in scenario.objective_names if o.lower().startswith("status")

618 ][0]

619 cancelled_status = [

620 SolverStatus.UNKNOWN,

621 SolverStatus.CRASHED,

622 SolverStatus.WRONG,

623 SolverStatus.ERROR,

624 SolverStatus.KILLED,

625 ]

626 for solver in scenario.solvers:

627 status = scenario.get_value(solver=solver, objective=status_objective)

628 for status in scenario.get_value(solver=solver, objective=status_objective):

629 status = SolverStatus(status)

630 if status in cancelled_status:

631 solver_cancelled_count[solver] += 1

632 elif status == SolverStatus.TIMEOUT:

633 solver_timeout_count[solver] += 1

634

635 report.append(latex.AutoRef("tab:parallelportfoliotable"))

636 report.append(pl.utils.bold(" "))

637 report.append(" shows the performance of the portfolio on the test set(s).")

638 tabular = pl.Tabular("r|rrrr")

639 tabular.add_row(["Solver", objective, "# Timeouts", "# Cancelled", "# Best"])

640 tabular.add_hline()

641 solver_performance = {

642 solver: round(performance, MAX_DEC)

643 for solver, _, performance in scenario.get_solver_ranking(objective=objective)

644 }

645 for solver in scenario.solvers:

646 tabular.add_row(

647 solver,

648 solver_performance[solver],

649 solver_timeout_count[solver],

650 solver_cancelled_count[solver],

651 best_solver_count[solver],

652 )

653 tabular.add_hline()

654 portfolio_performance = round(

655 scenario.best_performance(objective=objective), MAX_DEC

656 )

657 tabular.add_row(

658 portfolio_name,

659 portfolio_performance,

660 sum(solver_timeout_count.values()),

661 sum(solver_cancelled_count.values()),

662 sum(best_solver_count.values()),

663 )

664 table_portfolio = pl.Table(position="h")

665 table_portfolio.append(pl.UnsafeCommand("centering"))

666 table_portfolio.append(tabular)

667 table_portfolio.add_caption("Parallel Portfolio Performance")

668 table_portfolio.append(pl.UnsafeCommand(r"label{tab:parallelportfoliotable}"))

669 report.append(table_portfolio)

670

671 # 4. Create scatter plot analysis between the portfolio and the single best solver

672 sbs_name = scenario.get_solver_ranking(objective=objective)[0][0]

673 sbs_instance_performance = scenario.get_value(

674 solver=sbs_name, objective=objective.name

675 )

676 sbs_name = Path(sbs_name).name

677 report.append(latex.AutoRef("fig:portfoliovssbs"))

678 report.append(pl.utils.bold(" "))

679 report.append(

680 " shows the emprical comparison between the portfolio and the single "

681 f"best solver (SBS) {sbs_name}."

682 )

683 portfolio_instance_performance = scenario.best_instance_performance(

684 objective=objective.name

685 ).tolist()

686

687 df = pd.DataFrame(

688 [sbs_instance_performance, portfolio_instance_performance],

689 index=[f"SBS ({sbs_name}) Performance", "Portfolio Performance"],

690 dtype=float,

691 ).T

692 plot = latex.comparison_plot(df, None)

693 plot_path = plot_dir / f"sbs_{sbs_name}_vs_parallel_portfolio.pdf"

694 plot.write_image(plot_path, width=500, height=500)

695 with report.create(pl.Figure(position="h")) as figure:

696 figure.add_image(

697 str(plot_path.relative_to(report_dir)),

698 width=pl.utils.NoEscape(r"0.6\textwidth"),

699 )

700 figure.add_caption(f"Portfolio vs SBS Performance ({objective})")

701 figure.append(pl.UnsafeCommand(r"label{fig:portfoliovssbs}"))

702

703

704def append_dataframe_longtable(

705 report: pl.Document,

706 df: pd.DataFrame,

707 caption: str,

708 label: str,

709 max_cols: int = MAX_COLS_PER_TABLE,

710 wide_threshold: int = WIDE_TABLE_THRESHOLD,

711 num_keys: int = NUM_KEYS_PDF,

712) -> None:

713 """Appends a pandas DataFrame to a PyLaTeX document as one or more LaTeX longtables.

714

715 Args:

716 report: The PyLaTeX document to which the table(s) will be appended.

717 df: The DataFrame to be rendered as LaTeX longtable(s).

718 caption: The caption for the table(s).

719 label: The LaTeX label for referencing the table(s).

720 max_cols: Maximum number of columns per table chunk.

721 Defaults to MAX_COLS_PER_TABLE.

722 wide_threshold: Number of columns above which the table is rotated

723 to landscape. Defaults to WIDE_TABLE_THRESHOLD.

724 num_keys: Number of key columns to include in each table chunk.

725 Defaults to NUM_KEYS_PDF.

726

727 Returns:

728 None

729 """

730 import math

731 from typing import Union

732

733 def latex_escape_text(s: str) -> str:

734 """Escape special LaTeX characters in a string."""

735 # escape text, but insert our own LaTeX macro around it

736 return (

737 s.replace("\\", r"\textbackslash{}")

738 .replace("&", r"\&")

739 .replace("%", r"\%")

740 .replace("$", r"\$")

741 .replace("#", r"\#")

742 .replace("_", r"\_")

743 .replace("{", r"\{")

744 .replace("}", r"\}")

745 .replace("~", r"\textasciitilde{}")

746 .replace("^", r"\textasciicircum{}")

747 )

748

749 def last_path_segment(text: str) -> str:

750 """Keep only the last non-empty path-like segment. Handles both back and forwardslashes. Removes any leading/trailing slashes."""

751 t = str(text).strip().replace("\\", "/")

752 parts = [p for p in t.split("/") if p] # ignore empty segments

753 return parts[-1] if parts else ""

754

755 def wrap_fixed_shortstack(cell: str, width: int = MAX_CELL_LEN) -> str:

756 """Wrap long text to a fixed width for LaTeX tables."""

757 string_cell = last_path_segment(cell)

758 if len(string_cell) <= width:

759 return latex_escape_text(string_cell)

760 chunks = [

761 latex_escape_text(string_cell[index : index + width])

762 for index in range(0, len(string_cell), width)

763 ]

764 # left-aligned shortstack: forces line breaks and grows row height

765 return r"\shortstack[l]{" + r"\\ ".join(chunks) + "}"

766

767 def wrap_header_labels(

768 df: pd.DataFrame, width_per_cell: int = MAX_CELL_LEN

769 ) -> pd.DataFrame:

770 """Wrap long header labels to a fixed width for LaTeX tables."""

771 df_copy = df.copy()

772 if isinstance(df_copy.columns, pd.MultiIndex):

773 new_cols = []

774 for tup in df_copy.columns:

775 new_cols.append(

776 tuple(

777 wrap_fixed_shortstack(last_path_segment(index), width_per_cell)

778 if isinstance(index, str)

779 else index

780 for index in tup

781 )

782 )

783 names = [

784 (

785 wrap_fixed_shortstack(last_path_segment(name), width_per_cell)

786 if isinstance(name, str)

787 else name

788 )

789 for name in (df_copy.columns.names or [])

790 ]

791 df_copy.columns = pd.MultiIndex.from_tuples(new_cols, names=names)

792 else:

793 df_copy.columns = [

794 wrap_fixed_shortstack(last_path_segment(column), width_per_cell)

795 if isinstance(column, str)

796 else column

797 for column in df_copy.columns

798 ]

799 return df_copy

800

801 def format_cell(cell: Union[int, float, str]) -> str:

802 """Format a cell for printing in a LaTeX table."""

803 try:

804 float_cell = float(cell)

805 except (TypeError, ValueError):

806 return wrap_fixed_shortstack(last_path_segment(str(cell)), MAX_CELL_LEN)

807

808 if not math.isfinite(float_cell):

809 return "NaN"

810

811 if float_cell.is_integer():

812 return str(int(float_cell))

813 # round to MAX_DEC, then strip trailing zeros

814 s = f"{round(float_cell, MAX_DEC):.{MAX_DEC}f}".rstrip("0").rstrip(".")

815 return s

816

817 df_copy = df.copy()

818

819 # Inorder to be able to show the key columns, we need to reset the index

820 if not isinstance(df_copy.index, pd.RangeIndex) and df_copy.index.name in (

821 None,

822 "index",

823 "",

824 ):

825 df_copy = df_copy.reset_index()

826

827 # Remove the Seed column from the performance dataframe since it is not

828 # very informative and clutters the table

829 if isinstance(df, PerformanceDataFrame):

830 mask = df_copy.columns.get_level_values("Meta") == "Seed"

831 df_copy = df_copy.loc[:, ~mask]

832

833 # For performance dataframe, we want to show values of objectives with their corresponding instance and run.

834 # Since objective, instance and run are indexes in the performance dataframe,

835 # they will be part of the index and we need to reset the index to get them

836 # as columns.

837 # We'll name them as key columns, since they are the key to identify the value of the objective

838 # for a given instance and run.

839 # (Respectively FeatureGroup, FeatureName, Extractor in feature dataframe)

840 keys = df_copy.iloc[:, :num_keys] # Key columns

841

842 # Split the dataframe into chunks of max_cols per page

843 number_column_chunks = max((df_copy.shape[1] - 1) // max_cols + 1, 1)

844 for i in range(number_column_chunks):

845 report.append(NewPage())

846 full_part = None

847 start_col = i * max_cols

848 end_col = (i + 1) * max_cols

849

850 # Select the value columns for this chunk

851 values = df_copy.iloc[

852 :,

853 start_col + num_keys : end_col + num_keys,

854 ]

855

856 # Concatenate the key and value columns

857 full_part = pd.concat([keys, values], axis=1)

858

859 # If there are no value columns left, we are done

860 if (full_part.shape[1]) <= num_keys:

861 break

862

863 full_part_wrapped = wrap_header_labels(full_part, MAX_CELL_LEN)

864

865 # tell pandas how to print numbers

866 formatters = {col: format_cell for col in full_part_wrapped.columns}

867

868 tex = full_part_wrapped.to_latex(

869 longtable=True,

870 index=False,

871 escape=False, # We want to split the long words, not escape them

872 caption=caption + (f" (part {i + 1})" if number_column_chunks > 1 else ""),

873 label=label + f"-p{i + 1}" if number_column_chunks > 1 else label,

874 float_format=None,

875 multicolumn=True,

876 multicolumn_format="c",

877 multirow=False,

878 column_format="c" * full_part_wrapped.shape[1],

879 formatters=formatters,

880 )

881

882 # centre the whole table horizontally

883 centred_tex = "\\begin{center}\n" + tex + "\\end{center}\n"

884

885 # rotate if still too wide

886 if full_part_wrapped.shape[1] > wide_threshold:

887 report.append(NoEscape(r"\begin{landscape}"))

888 report.append(NoEscape(centred_tex))

889 report.append(NoEscape(r"\end{landscape}"))

890 else:

891 report.append(NoEscape(centred_tex))

892

893

894def generate_appendix(

895 report: pl.Document,

896 performance_data: PerformanceDataFrame,

897 feature_data: FeatureDataFrame,

898) -> None:

899 """Appendix.

900

901 Args:

902 report: The LaTeX document object to which the appendix will be added.

903 performance_data: The performance data to be included in the appendix.

904 feature_data: The feature data to be included in the appendix.

905

906 Returns:

907 None

908 """

909 # preamble

910 for pkg in ("longtable", "pdflscape", "caption", "booktabs", "placeins"):

911 p = pl.Package(pkg)

912 if p not in report.packages:

913 report.packages.append(p)

914

915 report.append(pl.NewPage())

916 report.append(pl.NoEscape(r"\clearpage"))

917 report.append(pl.NoEscape(r"\FloatBarrier"))

918 report.append(pl.UnsafeCommand("appendix"))

919 report.append(pl.Section("Performance DataFrame"))

920

921 append_dataframe_longtable(

922 report,

923 performance_data,

924 caption="Performance DataFrame",

925 label="tab:perf_data",

926 max_cols=MAX_COLS_PER_TABLE,

927 wide_threshold=WIDE_TABLE_THRESHOLD,

928 num_keys=NUM_KEYS_PDF,

929 )

930

931 report.append(pl.Section("Feature DataFrame"))

932 append_dataframe_longtable(

933 report,

934 feature_data,

935 caption="Feature DataFrame",

936 label="tab:feature_data",

937 max_cols=MAX_COLS_PER_TABLE,

938 wide_threshold=WIDE_TABLE_THRESHOLD,

939 num_keys=NUM_KEYS_FDF,

940 )

941

942 report.append(pl.NoEscape(r"\FloatBarrier"))

943

944

945def main(argv: list[str]) -> None:

946 """Generate a report for executed experiments in the platform."""

947 # Log command call

948 sl.log_command(sys.argv, gv.settings().random_state)

949

950 # Define command line arguments

951 parser = parser_function()

952

953 # Process command line arguments

954 args = parser.parse_args(argv)

955

956 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)

957 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)

958

959 # Fetch all known scenarios

960 configuration_scenarios = gv.configuration_scenarios(refresh=True)

961 selection_scenarios = gv.selection_scenarios(refresh=True)

962 parallel_portfolio_scenarios = gv.parallel_portfolio_scenarios()

963

964 # Filter scenarios based on args

965 if args.solvers:

966 solvers = [

967 resolve_object_name(

968 s, gv.solver_nickname_mapping, gv.settings().DEFAULT_solver_dir, Solver

969 )

970 for s in args.solvers

971 ]

972 configuration_scenarios = [

973 s

974 for s in configuration_scenarios

975 if s.solver.directory in [s.directory for s in solvers]

976 ]

977 selection_scenarios = [

978 s

979 for s in selection_scenarios

980 if set(s.solvers).intersection([str(s.directory) for s in solvers])

981 ]

982 parallel_portfolio_scenarios = [

983 s

984 for s in parallel_portfolio_scenarios

985 if set(s.solvers).intersection([str(s.directory) for s in solvers])

986 ]

987 if args.instance_sets:

988 instance_sets = [

989 resolve_object_name(

990 s,

991 gv.instance_set_nickname_mapping,

992 gv.settings().DEFAULT_instance_dir,

993 Instance_Set,

994 )

995 for s in args.instance_sets

996 ]

997 configuration_scenarios = [

998 s

999 for s in configuration_scenarios

1000 if s.instance_set.directory in [s.directory for s in instance_sets]

1001 ]

1002 selection_scenarios = [

1003 s

1004 for s in selection_scenarios

1005 if set(s.instance_sets).intersection([str(s.name) for s in instance_sets])

1006 ]

1007 parallel_portfolio_scenarios = [

1008 s

1009 for s in parallel_portfolio_scenarios

1010 if set(s.instance_sets).intersection([str(s.name) for s in instance_sets])

1011 ]

1012

1013 processed_configuration_scenarios = []

1014 processed_selection_scenarios = []

1015 possible_test_sets = [

1016 Instance_Set(p) for p in gv.settings().DEFAULT_instance_dir.iterdir()

1017 ]

1018 for configuration_scenario in configuration_scenarios:

1019 processed_configuration_scenarios.append(

1020 (

1021 ConfigurationOutput(

1022 configuration_scenario, performance_data, possible_test_sets

1023 ),

1024 configuration_scenario,

1025 )

1026 )

1027 for selection_scenario in selection_scenarios:

1028 processed_selection_scenarios.append(

1029 (SelectionOutput(selection_scenario), selection_scenario)

1030 )

1031

1032 raw_output = gv.settings().DEFAULT_output_analysis / "JSON"

1033 if raw_output.exists(): # Clean

1034 shutil.rmtree(raw_output)

1035 raw_output.mkdir()

1036

1037 # Write JSON

1038 output_json = {}

1039 for output, configuration_scenario in processed_configuration_scenarios:

1040 output_json[configuration_scenario.name] = output.serialise()

1041 for output, selection_scenario in processed_selection_scenarios:

1042 output_json[selection_scenario.name] = output.serialise()

1043 # TODO: We do not have an output object for parallel portfolios

1044

1045 raw_output_json = raw_output / "output.json"

1046 with raw_output_json.open("w") as f:

1047 json.dump(output_json, f, indent=4)

1048

1049 print(f"Machine readable output written to: {raw_output_json}")

1050

1051 if args.only_json: # Done

1052 sys.exit(0)

1053

1054 # TODO: Group scenarios based on:

1055 # - Configuration / Selection / Parallel Portfolio

1056 # - Training Instance Set / Testing Instance Set

1057 # - Configurators can be merged as long as we can match their budgets clearly

1058 report_directory = gv.settings().DEFAULT_output_analysis / "report"

1059 if report_directory.exists(): # Clean it

1060 shutil.rmtree(report_directory)

1061 report_directory.mkdir()

1062 target_path = report_directory / "report"

1063 report = pl.document.Document(

1064 default_filepath=str(target_path), document_options=["british"]

1065 )

1066 bibpath = gv.settings().bibliography_path

1067 newbibpath = report_directory / "report.bib"

1068 shutil.copy(bibpath, newbibpath)

1069 # BUGFIX for unknown package load in PyLatex

1070 p = pl.package.Package("lastpage")

1071 if p in report.packages:

1072 report.packages.remove(p)

1073 report.packages.append(

1074 pl.package.Package(

1075 "geometry",

1076 options=[

1077 "verbose",

1078 "tmargin=3.5cm",

1079 "bmargin=3.5cm",

1080 "lmargin=3cm",

1081 "rmargin=3cm",

1082 ],

1083 )

1084 )

1085 # Unsafe command for \emph{Sparkle}

1086 report.preamble.extend(

1087 [

1088 pl.UnsafeCommand("title", r"\emph{Sparkle} Algorithm Portfolio report"),

1089 pl.UnsafeCommand(

1090 "author",

1091 r"Generated by \emph{Sparkle} "

1092 f"(version: {__sparkle_version__})",

1093 ),

1094 ]

1095 )

1096 report.append(pl.Command("maketitle"))

1097 report.append(pl.Section("Introduction"))

1098 # TODO: A quick overview to the introduction on whats considered in the report

1099 # regarding Solvers, Instance Sets and Feature Extractors

1100 report.append(

1101 pl.UnsafeCommand(

1102 r"emph{Sparkle}~\cite{Hoos15} is a multi-agent problem-solving platform based on"

1103 r" Programming by Optimisation (PbO)~\cite{Hoos12}, and would provide a number "

1104 "of effective algorithm optimisation techniques (such as automated algorithm "

1105 "configuration, portfolio-based algorithm selection, etc.) to accelerate the "

1106 "existing solvers."

1107 )

1108 )

1109

1110 for scenario_output, scenario in processed_configuration_scenarios:

1111 generate_configuration_section(report, scenario, scenario_output)

1112

1113 for scenario_output, scenario in processed_selection_scenarios:

1114 generate_selection_section(report, scenario, scenario_output)

1115

1116 for parallel_dataframe in parallel_portfolio_scenarios:

1117 generate_parallel_portfolio_section(report, parallel_dataframe)

1118

1119 # Check if user wants to add appendix and

1120 settings = gv.settings(args)

1121 if settings.appendices:

1122 generate_appendix(report, performance_data, feature_data)

1123

1124 # Adding bibliography

1125 report.append(pl.NewPage()) # Ensure it starts on new page

1126 report.append(pl.Command("bibliographystyle", arguments=["plain"]))

1127 report.append(pl.Command("bibliography", arguments=[str(newbibpath)]))

1128 # Generate the report .tex and .pdf

1129 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")

1130 # TODO: This should be done by PyLatex. Generate the bib and regenerate the report

1131 # Reference for the (terrible) solution: https://tex.stackexchange.com/

1132 # questions/63852/question-mark-or-bold-citation-key-instead-of-citation-number

1133 import subprocess

1134

1135 # Run BibTex silently

1136 subprocess.run(

1137 ["bibtex", newbibpath.with_suffix("")],

1138 stdout=subprocess.DEVNULL,

1139 stderr=subprocess.DEVNULL,

1140 )

1141 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")

1142 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")

1143 print(f"Report generated at {target_path}.pdf")

1144 sys.exit(0)

1145

1146

1147if __name__ == "__main__":

1148 main(sys.argv[1:])

Coverage for sparkle/CLI/generate_report.py: 22%

469 statements