Coverage for src/sparkle/CLI/generate

1#!/usr/bin/env python3

2"""Sparkle command to generate a report for an executed experiment."""

4import sys

5import shutil

6import argparse

7from pathlib import Path

8import time

9import json

10import pandas as pd

12from pylatex import NoEscape, NewPage

13import pylatex as pl

14from sparkle import __version__ as __sparkle_version__

16from sparkle.CLI.help import global_variables as gv

17from sparkle.CLI.help import resolve_object_name

18from sparkle.CLI.help import logging as sl

19from sparkle.CLI.help import argparse_custom as ac

21from sparkle.solver import Solver

22from sparkle.instance import Instance_Set

23from sparkle.selector import Extractor

24from sparkle.structures import PerformanceDataFrame, FeatureDataFrame

25from sparkle.configurator.configurator import ConfigurationScenario

26from sparkle.selector.selector import SelectionScenario

27from sparkle.types import SolverStatus

28from sparkle.platform import Settings

30from sparkle.platform import latex

31from sparkle.platform.output.configuration_output import ConfigurationOutput

32from sparkle.platform.output.selection_output import SelectionOutput

35MAX_DEC = 4 # Maximum decimals used for each reported value

36MAX_COLS_PER_TABLE = 2 # number of value columns extra to number of key columns

37WIDE_TABLE_THRESHOLD = 4 # columns above which we switch to landscape

38NUM_KEYS_PDF = 3

39NUM_KEYS_FDF = 3

40MAX_CELL_LEN = 17

43def parser_function() -> argparse.ArgumentParser:

44 """Define the command line arguments."""

45 parser = argparse.ArgumentParser(

46 description="Generates a report for all known selection, configuration and "

47 "parallel portfolio scenarios will be generated.",

48 epilog="If you wish to filter specific solvers, instance sets, ... have a look "

49 "at the command line arguments.",

50 )

51 # Add argument for filtering solvers

52 parser.add_argument(

53 *ac.SolversReportArgument.names, **ac.SolversReportArgument.kwargs

54 )

55 # Add argument for filtering instance sets

56 parser.add_argument(

57 *ac.InstanceSetsReportArgument.names, **ac.InstanceSetsReportArgument.kwargs

58 )

60 # Add argument for filtering appendix

61 parser.add_argument(

62 *Settings.OPTION_appendices.args, **Settings.OPTION_appendices.kwargs

63 )

65 # Add argument for filtering configurators?

66 # Add argument for filtering selectors?

67 # Add argument for filtering ??? scenario ids? configuration ids?

68 parser.add_argument(*ac.GenerateJSONArgument.names, **ac.GenerateJSONArgument.kwargs)

69 return parser

72def generate_configuration_section(

73 report: pl.Document,

74 scenario: ConfigurationScenario,

75 scenario_output: ConfigurationOutput,

76) -> None:

77 """Generate a section for a configuration scenario."""

78 report_dir = Path(report.default_filepath).parent

79 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))

80 plot_dir = (

81 report_dir

82 / f"{scenario.configurator.__name__}_{scenario.name}_plots_{time_stamp}"

83 )

84 plot_dir.mkdir(exist_ok=True)

86 # 1. Write section intro

87 report.append(

88 pl.Section(

89 f"{scenario.configurator.__name__} Configuration: "

90 f"{scenario.solver.name} on {scenario.instance_set.name}"

91 )

92 )

93 report.append("In this scenario, ")

94 report.append(

95 pl.UnsafeCommand(

96 f"textbf{{{scenario.configurator.__name__}}} "

97 f"({scenario.configurator.full_name})~\\cite"

98 f"{{{scenario.configurator.__name__}}} with version "

99 f"{scenario.configurator.version} was used for configuration. "

100 )

101 )

102 report.append(

103 f"The Solver {scenario.solver} was optimised on training set "

104 f"{scenario.instance_set}. The scenario was run {scenario.number_of_runs} "

105 f"times independently with different seeds, yielding {scenario.number_of_runs} "

106 f"configurations. The cutoff time for the solver was set to "

107 f"{scenario.solver_cutoff_time} seconds. The optimised objective is "

108 f"{scenario.sparkle_objectives[0]}. Each Configuration was evaluated on the "

109 "training set to determine the best configuration, e.g. the best "

110 f"{scenario.sparkle_objectives[0]} value on the training set."

111 )

112

113 # 2. Report all the configurator settings in table format

114 report.append(pl.Subsection("Configurator Settings"))

115 report.append(

116 f"The following settings were used for {scenario.configurator.__name__}:\n"

117 )

118 tabular = pl.Tabular("l|r")

119 tabular.add_row("Setting", "Value")

120 tabular.add_hline()

121 for setting, value in scenario.serialise().items():

122 # Keep only the last path segment for paths

123 # Otherwise tables get too wide and we can't see other values

124 stripped_value = str(value).strip().replace("\\", "/")

125 segments = [segment for segment in stripped_value.split("/") if segment]

126 if segments[-1]:

127 tabular.add_row([setting, segments[-1]])

128 else:

129 tabular.add_row([setting, "None"])

130 table_conf_settings = pl.Table(position="h")

131 table_conf_settings.append(pl.UnsafeCommand("centering"))

132 table_conf_settings.append(tabular)

133 table_conf_settings.add_caption("Configurator Settings")

134 report.append(table_conf_settings)

135

136 # 3. Report details on instance and solver used

137 report.append(pl.Subsection("Solver & Instance Set(s) Details"))

138 cs = scenario_output.solver.get_configuration_space()

139 report.append(

140 f"The solver {scenario_output.solver} was configured using "

141 f"{len(cs.values())} configurable (hyper)parameters. "

142 f"The configuration space has {len(cs.conditions)} conditions. "

143 )

144 report.append("The following instance sets were used for the scenario:")

145 with report.create(pl.Itemize()) as instance_set_latex_list:

146 for instance_set in [

147 scenario_output.instance_set_train

148 ] + scenario_output.test_instance_sets:

149 training_set_name = instance_set.name.replace("_", " ") # Latex fix

150 instance_set_latex_list.add_item(

151 pl.UnsafeCommand(

152 f"textbf{{{training_set_name}}} ({instance_set.size} instances)"

153 )

154 )

155

156 # Function to generate a results summary of default vs best on an instance set

157 def instance_set_summary(instance_set_name: str) -> None:

158 """Generate a results summary of default vs best on an instance set."""

159 instance_set_results = scenario_output.instance_set_results[instance_set_name]

160 report.append(

161 f"The {scenario.sparkle_objectives[0]} value of the Default "

162 f"Configuration on {instance_set_name} was "

163 )

164 report.append(

165 pl.UnsafeCommand(

166 f"textbf{{{round(instance_set_results.default_performance, MAX_DEC)}}}.\n"

167 )

168 )

169 report.append(

170 f"The {scenario.sparkle_objectives[0]} value of the Best "

171 f"Configuration on {instance_set_name} was "

172 )

173 report.append(

174 pl.UnsafeCommand(

175 f"textbf{{{round(instance_set_results.best_performance, MAX_DEC)}}}.\n"

176 )

177 )

178 report.append("In ")

179 report.append(latex.AutoRef(f"fig:bestvsdefault{instance_set_name}{time_stamp}"))

180 report.append(pl.utils.bold(" ")) # Force white space

181 report.append("the results are plotted per instance.")

182 # Create graph to compare best configuration vs default on the instance set

183

184 df = pd.DataFrame(

185 [

186 instance_set_results.default_instance_performance,

187 instance_set_results.best_instance_performance,

188 ],

189 index=["Default Configuration", "Best Configuration"],

190 dtype=float,

191 ).T

192 plot = latex.comparison_plot(df, None)

193 plot_path = (

194 plot_dir / f"{scenario_output.best_configuration_key}_vs_"

195 f"Default_{instance_set_name}.pdf"

196 )

197 plot.write_image(plot_path, width=500, height=500)

198 with report.create(pl.Figure(position="h")) as figure:

199 figure.add_image(

200 str(plot_path.relative_to(report_dir)),

201 width=pl.utils.NoEscape(r"0.6\textwidth"),

202 )

203 figure.add_caption(

204 f"Best vs Default Performance on {instance_set_name} "

205 f"({scenario.sparkle_objectives[0]})"

206 )

207 figure.append(

208 pl.UnsafeCommand(

209 r"label{"

210 f"fig:bestvsdefault{instance_set_name}{time_stamp}"

211 r"}"

212 )

213 )

214 if scenario.sparkle_objectives[0].time: # Write status table

215 report.append("The following Solver status were found per instance:")

216 tabular = pl.Tabular("l|c|c|c")

217 tabular.add_row("Status", "Default", "Best", "Overlap")

218 tabular.add_hline()

219 # Count the statuses

220 for status in SolverStatus:

221 default_count, best_count, overlap_count = 0, 0, 0

222 for instance in instance_set_results.instance_status_default.keys():

223 instance = str(instance)

224 default_hit = (

225 instance_set_results.instance_status_default[instance] == status

226 )

227 best_hit = (

228 instance_set_results.instance_status_best[instance] == status

229 )

230 default_count += default_hit

231 best_count += best_hit

232 overlap_count += default_hit and best_hit

233 if default_count or best_count:

234 tabular.add_row(status, default_count, best_count, overlap_count)

235 table_status_values = pl.Table(position="h")

236 table_status_values.append(pl.UnsafeCommand("centering"))

237 table_status_values.append(tabular)

238 table_status_values.add_caption(

239 "Status count for the best and default configuration."

240 )

241 report.append(table_status_values)

242

243 # 4. Report the results of the best configuration on the training set vs the default

244 report.append(

245 pl.Subsection(

246 f"Comparison of Default and Best Configuration on Training Set "

247 f"{scenario_output.instance_set_train.name}"

248 )

249 )

250 instance_set_summary(scenario_output.instance_set_train.name)

251

252 # 5. Report the actual config values

253 report.append(pl.Subsubsection("Best Configuration Values"))

254 if (

255 scenario_output.best_configuration_key

256 == PerformanceDataFrame.default_configuration

257 ):

258 report.append(

259 "The configurator failed to find a better configuration than the "

260 "default configuration on the training set in this scenario."

261 )

262 else:

263 report.append(

264 "The following parameter values "

265 "were found to be the best on the training set:\n"

266 )

267 tabular = pl.Tabular("l|r")

268 tabular.add_row("Parameter", "Value")

269 tabular.add_hline()

270 for parameter, value in scenario_output.best_configuration.items():

271 tabular.add_row([parameter, str(value)])

272 table_best_values = pl.Table(position="h")

273 table_best_values.append(pl.UnsafeCommand("centering"))

274 table_best_values.append(tabular)

275 table_best_values.add_caption("Best found configuration values")

276 report.append(table_best_values)

277

278 # 6. Report the results of best vs default conf on the test sets

279

280 for test_set in scenario_output.test_instance_sets:

281 report.append(

282 pl.Subsection(

283 f"Comparison of Default and Best Configuration on Test Set "

284 f"{test_set.name}"

285 )

286 )

287 instance_set_summary(test_set.name)

288

289 # 7. Report the parameter ablation scenario if present

290 if scenario.ablation_scenario:

291 report.append(pl.Subsection("Parameter importance via Ablation"))

292 report.append("Ablation analysis ")

293 report.append(pl.UnsafeCommand(r"cite{FawcettHoos16} "))

294 test_set = scenario.ablation_scenario.test_set

295 if not scenario.ablation_scenario.test_set:

296 test_set = scenario.ablation_scenario.train_set

297 report.append(

298 f"is performed from the default configuration of {scenario.solver} to the "

299 f"best found configuration ({scenario_output.best_configuration_key}) "

300 "to see which parameter changes between them contribute most to the improved"

301 " performance. The ablation path uses the training set "

302 f"{scenario.ablation_scenario.train_set.name} and validation is performed "

303 f"on the test set {test_set.name}. The set of parameters that differ in the "

304 "two configurations will form the ablation path. Starting from the default "

305 "configuration, the path is computed by performing a sequence of rounds. In"

306 " a round, each available parameter is flipped in the configuration and is "

307 "validated on its performance. The flipped parameter with the best "

308 "performance in that round, is added to the configuration and the next round"

309 " starts with the remaining parameters. This repeats until all parameters "

310 "are flipped, which is the best found configuration. The analysis resulted "

311 "in the ablation presented in "

312 )

313 report.append(latex.AutoRef("tab:ablationtable"))

314 report.append(".")

315

316 # Add ablation table

317 tabular = pl.Tabular("r|l|r|r|r")

318 data = scenario.ablation_scenario.read_ablation_table()

319 for index, row in enumerate(data):

320 tabular.add_row(*row)

321 if index == 0:

322 tabular.add_hline()

323 table_ablation = pl.Table(position="h")

324 table_ablation.append(pl.UnsafeCommand("centering"))

325 table_ablation.append(tabular)

326 table_ablation.add_caption("Ablation table")

327 table_ablation.append(pl.UnsafeCommand(r"label{tab:ablationtable}"))

328 report.append(table_ablation)

329

330

331def generate_selection_section(

332 report: pl.Document, scenario: SelectionScenario, scenario_output: SelectionOutput

333) -> None:

334 """Generate a section for a selection scenario."""

335 report_dir = Path(report.default_filepath).parent

336 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))

337 plot_dir = report_dir / f"{scenario.name.replace(' ', '_')}_plots_{time_stamp}"

338 plot_dir.mkdir(exist_ok=True)

339 report.append(

340 pl.Section(

341 f"Selection: {scenario.selector.model_class.__name__} on "

342 f"{' '.join([s[0] for s in scenario_output.training_instance_sets])}"

343 )

344 )

345 report.append(

346 f"In this scenario, a {scenario.selector.model_class.__name__} "

347 f" ({scenario.selector.selector_class.__name__}) was trained on the "

348 "performance and feature data using ASF-lib. The following solvers "

349 f"were run with a cutoff time of {scenario.solver_cutoff} seconds:"

350 )

351 with report.create(pl.Itemize()) as solver_latex_list:

352 for solver_name in scenario_output.solvers.keys():

353 solver_name = solver_name.replace("_", " ")

354 solver_latex_list.add_item(

355 pl.UnsafeCommand(

356 f"textbf{{{solver_name}}} "

357 f"({len(scenario_output.solvers[solver_name])} configurations)"

358 )

359 )

360 # Report training instance sets

361 report.append("The following training instance sets were used:")

362 with report.create(pl.Itemize()) as instance_set_latex_list:

363 for training_set_name, set_size in scenario_output.training_instance_sets:

364 training_set_name = training_set_name.replace("_", " ") # Latex fix

365 instance_set_latex_list.add_item(

366 pl.UnsafeCommand(f"textbf{{{training_set_name}}} ({set_size} instances)")

367 )

368 # Report feature extractors

369 report.append(

370 "The following feature extractors were used with a extractor cutoff "

371 f"time of {scenario.extractor_cutoff} seconds:"

372 )

373 with report.create(pl.Itemize()) as feature_extractor_latex_list:

374 for feature_extractor_name in scenario.feature_extractors:

375 extractor = resolve_object_name(

376 feature_extractor_name,

377 gv.file_storage_data_mapping[gv.extractor_nickname_list_path],

378 gv.settings().DEFAULT_extractor_dir,

379 class_name=Extractor,

380 )

381 feature_extractor_name = feature_extractor_name.replace("_", " ") # Latex

382 feature_extractor_latex_list.add_item(

383 pl.UnsafeCommand(

384 f"textbf{{{feature_extractor_name}}} "

385 f"({extractor.output_dimension} features)"

386 )

387 )

388 # Report Training results

389 report.append(pl.Subsection("Training Results"))

390 # 1. Report VBS and selector performance, create ranking list of the solvers

391 # TODO Add ref here to the training sets section?

392 report.append(

393 f"In this section, the {scenario.objective.name} results for the "

394 "portfolio selector on solving the training instance set(s) listed "

395 "is reported. "

396 )

397 report.append(

398 f"The {scenario.objective.name} values for the Virtual Best Solver "

399 "(VBS), i.e., the perfect portfolio selector is "

400 )

401 report.append(pl.utils.bold(f"{round(scenario_output.vbs_performance, MAX_DEC)}"))

402 report.append(", the actual portfolio selector performance is ")

403 report.append(

404 pl.utils.bold(f"{round(scenario_output.actual_performance, MAX_DEC)}.\n")

405 )

406

407 report.append(

408 f"Below, the solvers are ranked based on {scenario.objective.name} performance:"

409 )

410 with report.create(pl.Enumerate()) as ranking_list:

411 for solver_name, conf_id, value in scenario_output.solver_performance_ranking:

412 value = round(value, MAX_DEC)

413 solver_name = solver_name.replace("_", " ") # Latex fix

414 conf_id = conf_id.replace("_", " ") # Latex fix

415 ranking_list.add_item(

416 pl.UnsafeCommand(f"textbf{{{solver_name}}} ({conf_id}): {value}")

417 )

418

419 # 2. Marginal contribution ranking list VBS

420 report.append(pl.Subsubsection("Marginal Contribution Ranking List"))

421 report.append(

422 "The following list shows the marginal contribution ranking list for the VBS:"

423 )

424 with report.create(pl.Enumerate()) as ranking_list:

425 for (

426 solver_name,

427 conf_id,

428 contribution,

429 performance,

430 ) in scenario_output.marginal_contribution_perfect:

431 contribution, performance = (

432 round(contribution, MAX_DEC),

433 round(performance, MAX_DEC),

434 )

435 solver_name = solver_name.replace("_", " ") # Latex fix

436 conf_id = conf_id.replace("_", " ") # Latex fix

437 ranking_list.add_item(

438 pl.UnsafeCommand(

439 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})"

440 )

441 )

442

443 # 3. Marginal contribution ranking list actual selector

444 report.append(

445 "The following list shows the marginal contribution ranking list for "

446 "the actual portfolio selector:"

447 )

448 with report.create(pl.Enumerate()) as ranking_list:

449 for (

450 solver_name,

451 conf_id,

452 contribution,

453 performance,

454 ) in scenario_output.marginal_contribution_actual:

455 contribution, performance = (

456 round(contribution, MAX_DEC),

457 round(performance, MAX_DEC),

458 )

459 solver_name = solver_name.replace("_", " ") # Latex fix

460 conf_id = conf_id.replace("_", " ") # Latex fix

461 ranking_list.add_item(

462 pl.UnsafeCommand(

463 f"textbf{{{solver_name}}} ({conf_id}): {contribution} ({performance})"

464 )

465 )

466

467 # 4. Create scatter plot analysis

468 report.append(pl.Subsubsection("Scatter Plot Analysis"))

469 report.append(latex.AutoRef(f"fig:sbsvsselector{time_stamp}"))

470 report.append(pl.utils.bold(" ")) # Trick to force a white space

471 report.append(

472 "shows the empirical comparison between the portfolio "

473 "selector and the single best solver (SBS). "

474 )

475 report.append(latex.AutoRef("fig:vbsvsselector"))

476 report.append(pl.utils.bold(" ")) # Trick to force a white space

477 report.append(

478 "shows the empirical comparison between the actual portfolio selector "

479 "and the virtual best solver (VBS)."

480 )

481 # Create figure on SBS versus the selector

482 sbs_name, sbs_config, _ = scenario_output.solver_performance_ranking[0]

483 # sbs_plot_name = f"{Path(sbs_name).name} ({sbs_config})"

484 sbs_performance = scenario_output.sbs_performance

485 selector_performance = scenario_output.actual_performance_data

486

487 # Join the data together

488

489 df = pd.DataFrame(

490 [sbs_performance, selector_performance],

491 index=[f"{Path(sbs_name).name} ({sbs_config})", "Selector"],

492 dtype=float,

493 ).T

494 plot = latex.comparison_plot(df, "Single Best Solver vs Selector")

495 plot_path = (

496 plot_dir / f"{Path(sbs_name).name}_{sbs_config}_vs_"

497 f"Selector_{scenario.selector.model_class.__name__}.pdf"

498 )

499 plot.write_image(plot_path, width=500, height=500)

500 with report.create(pl.Figure()) as figure:

501 figure.add_image(

502 str(plot_path.relative_to(report_dir)),

503 width=pl.utils.NoEscape(r"0.6\textwidth"),

504 )

505 figure.add_caption(

506 "Empirical comparison between the Single Best Solver and the Selector"

507 )

508 label = r"label{fig:sbsvsselector" + str(time_stamp) + r"}"

509 figure.append(pl.UnsafeCommand(f"{label}"))

510

511 # Comparison between the actual portfolio selector in Sparkle and the VBS.

512 vbs_performance = scenario_output.vbs_performance_data.tolist()

513 df = pd.DataFrame(

514 [vbs_performance, selector_performance],

515 index=["Virtual Best Solver", "Selector"],

516 dtype=float,

517 ).T

518 plot = latex.comparison_plot(df, "Virtual Best Solver vs Selector")

519 plot_path = (

520 plot_dir

521 / f"Virtual_Best_Solver_vs_Selector_{scenario.selector.model_class.__name__}.pdf"

522 )

523 plot.write_image(plot_path, width=500, height=500)

524 with report.create(pl.Figure()) as figure:

525 figure.add_image(

526 str(plot_path.relative_to(report_dir)),

527 width=pl.utils.NoEscape(r"0.6\textwidth"),

528 )

529 figure.add_caption(

530 "Empirical comparison between the Virtual Best Solver and the Selector"

531 )

532 figure.append(pl.UnsafeCommand(r"label{fig:vbsvsselector}"))

533

534 if scenario_output.test_sets:

535 report.append(pl.Subsection("Test Results"))

536 report.append("The following results are reported on the test set(s):")

537 with report.create(pl.Itemize()) as latex_list:

538 for test_set_name, test_set_size in scenario_output.test_sets:

539 result = round(

540 scenario_output.test_set_performance[test_set_name], MAX_DEC

541 )

542 latex_list.add_item(

543 pl.UnsafeCommand(

544 f"textbf{{{test_set_name}}} ({test_set_size} instances): {result}"

545 )

546 )

547

548

549def generate_parallel_portfolio_section(

550 report: pl.Document, scenario: PerformanceDataFrame

551) -> None:

552 """Generate a section for a parallel portfolio scenario."""

553 report_dir = Path(report.default_filepath).parent

554 portfolio_name = scenario.csv_filepath.parent.name

555 time_stamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))

556 plot_dir = report_dir / f"{portfolio_name.replace(' ', '_')}_plots_{time_stamp}"

557 plot_dir.mkdir()

558 report.append(pl.Section(f"Parallel Portfolio {portfolio_name}"))

559 report.append(

560 "In this scenario, Sparkle runs the portfolio of Solvers on each instance in "

561 "parallel with "

562 f"{gv.settings().parallel_portfolio_num_seeds_per_solver} different "

563 "seeds. The cutoff time for each solver run is set to "

564 f"{gv.settings().solver_cutoff_time} seconds."

565 )

566 report.append(pl.Subsection("Solvers & Instance Sets"))

567 report.append("The following Solvers were used in the portfolio:")

568 # 1. Report on the Solvers and Instance Sets used for the portfolio

569 with report.create(pl.Itemize()) as solver_latex_list:

570 configs = scenario.configurations

571 for solver in scenario.solvers:

572 solver_name = solver.replace("_", " ")

573 solver_latex_list.add_item(

574 pl.UnsafeCommand(

575 f"textbf{{{solver_name}}} ({len(configs[solver])} configurations)"

576 )

577 )

578 report.append("The following Instance Sets were used in the portfolio:")

579 instance_sets = set(Path(instance).parent.name for instance in scenario.instances)

580 instance_set_count = [

581 len([i for i in scenario.instances if Path(i).parent.name == s])

582 for s in instance_sets

583 ]

584 with report.create(pl.Itemize()) as instance_set_latex_list:

585 for set_name, set_size in zip(instance_sets, instance_set_count):

586 set_name = set_name.replace("_", " ") # Latex fix

587 instance_set_latex_list.add_item(

588 pl.UnsafeCommand(f"textbf{{{set_name}}} ({set_size} instances)")

589 )

590 # 2. List which solver was the best on how many instances

591 report.append(pl.Subsection("Portfolio Performance"))

592 objective = scenario.objectives[0]

593 report.append(

594 f"The objective for the portfolio is {objective}. The "

595 "following performance of the solvers was found over the instances: "

596 )

597 best_solver_count = {solver: 0 for solver in scenario.solvers}

598 for instance in scenario.instances:

599 ranking = scenario.get_solver_ranking(objective=objective, instances=[instance])

600 best_solver_count[ranking[0][0]] += 1

601

602 with report.create(pl.Itemize()) as latex_list:

603 for solver, count in best_solver_count.items():

604 solver_name = solver.replace("_", " ")

605 latex_list.add_item(

606 pl.UnsafeCommand(

607 f"textbf{{{solver_name}}} was the best solver on {count} instance(s)."

608 )

609 )

610 # TODO Report how many instances remained unsolved

611

612 # 3. Create table showing the performance of the portfolio vs and all solvers,

613 # by showing the status count and number of times the solver was best

614 solver_cancelled_count = {solver: 0 for solver in scenario.solvers}

615 solver_timeout_count = {solver: 0 for solver in scenario.solvers}

616 status_objective = [

617 o for o in scenario.objective_names if o.lower().startswith("status")

618 ][0]

619 cancelled_status = [

620 SolverStatus.UNKNOWN,

621 SolverStatus.CRASHED,

622 SolverStatus.WRONG,

623 SolverStatus.ERROR,

624 SolverStatus.KILLED,

625 ]

626 for solver in scenario.solvers:

627 status = scenario.get_value(solver=solver, objective=status_objective)

628 for status in scenario.get_value(solver=solver, objective=status_objective):

629 status = SolverStatus(status)

630 if status in cancelled_status:

631 solver_cancelled_count[solver] += 1

632 elif status == SolverStatus.TIMEOUT:

633 solver_timeout_count[solver] += 1

634

635 report.append(latex.AutoRef("tab:parallelportfoliotable"))

636 report.append(pl.utils.bold(" "))

637 report.append(" shows the performance of the portfolio on the test set(s).")

638 tabular = pl.Tabular("r|rrrr")

639 tabular.add_row(["Solver", objective, "# Timeouts", "# Cancelled", "# Best"])

640 tabular.add_hline()

641 solver_performance = {

642 solver: round(performance, MAX_DEC)

643 for solver, _, performance in scenario.get_solver_ranking(objective=objective)

644 }

645 for solver in scenario.solvers:

646 tabular.add_row(

647 solver,

648 solver_performance[solver],

649 solver_timeout_count[solver],

650 solver_cancelled_count[solver],

651 best_solver_count[solver],

652 )

653 tabular.add_hline()

654 portfolio_performance = round(

655 scenario.best_performance(objective=objective), MAX_DEC

656 )

657 tabular.add_row(

658 portfolio_name,

659 portfolio_performance,

660 sum(solver_timeout_count.values()),

661 sum(solver_cancelled_count.values()),

662 sum(best_solver_count.values()),

663 )

664 table_portfolio = pl.Table(position="h")

665 table_portfolio.append(pl.UnsafeCommand("centering"))

666 table_portfolio.append(tabular)

667 table_portfolio.add_caption("Parallel Portfolio Performance")

668 table_portfolio.append(pl.UnsafeCommand(r"label{tab:parallelportfoliotable}"))

669 report.append(table_portfolio)

670

671 # 4. Create scatter plot analysis between the portfolio and the single best solver

672 sbs_name = scenario.get_solver_ranking(objective=objective)[0][0]

673 sbs_instance_performance = scenario.get_value(

674 solver=sbs_name, objective=objective.name

675 )

676 sbs_name = Path(sbs_name).name

677 report.append(latex.AutoRef("fig:portfoliovssbs"))

678 report.append(pl.utils.bold(" "))

679 report.append(

680 " shows the emprical comparison between the portfolio and the single "

681 f"best solver (SBS) {sbs_name}."

682 )

683 portfolio_instance_performance = scenario.best_instance_performance(

684 objective=objective.name

685 ).tolist()

686

687 df = pd.DataFrame(

688 [sbs_instance_performance, portfolio_instance_performance],

689 index=[f"SBS ({sbs_name}) Performance", "Portfolio Performance"],

690 dtype=float,

691 ).T

692 plot = latex.comparison_plot(df, None)

693 plot_path = plot_dir / f"sbs_{sbs_name}_vs_parallel_portfolio.pdf"

694 plot.write_image(plot_path, width=500, height=500)

695 with report.create(pl.Figure(position="h")) as figure:

696 figure.add_image(

697 str(plot_path.relative_to(report_dir)),

698 width=pl.utils.NoEscape(r"0.6\textwidth"),

699 )

700 figure.add_caption(f"Portfolio vs SBS Performance ({objective})")

701 figure.append(pl.UnsafeCommand(r"label{fig:portfoliovssbs}"))

702

703

704def append_dataframe_longtable(

705 report: pl.Document,

706 df: pd.DataFrame,

707 caption: str,

708 label: str,

709 max_cols: int = MAX_COLS_PER_TABLE,

710 wide_threshold: int = WIDE_TABLE_THRESHOLD,

711 num_keys: int = NUM_KEYS_PDF,

712) -> None:

713 """Appends a pandas DataFrame to a PyLaTeX document as one or more LaTeX longtables.

714

715 Args:

716 report: The PyLaTeX document to which the table(s) will be appended.

717 df: The DataFrame to be rendered as LaTeX longtable(s).

718 caption: The caption for the table(s).

719 label: The LaTeX label for referencing the table(s).

720 max_cols: Maximum number of columns per table chunk.

721 Defaults to MAX_COLS_PER_TABLE.

722 wide_threshold: Number of columns above which the table is rotated

723 to landscape. Defaults to WIDE_TABLE_THRESHOLD.

724 num_keys: Number of key columns to include in each table chunk.

725 Defaults to NUM_KEYS_PDF.

726

727 Returns:

728 None

729 """

730 import math

731 from typing import Union

732

733 def latex_escape_text(string: str) -> str:

734 """Escape special LaTeX characters in a string."""

735 # escape text, but insert our own LaTeX macro around it

736 return (

737 string.replace("\\", r"\textbackslash{}")

738 .replace("&", r"\&")

739 .replace("%", r"\%")

740 .replace("$", r"\$")

741 .replace("#", r"\#")

742 .replace("_", r"\_")

743 .replace("{", r"\{")

744 .replace("}", r"\}")

745 .replace("~", r"\textasciitilde{}")

746 .replace("^", r"\textasciicircum{}")

747 )

748

749 def last_path_segment(text: str) -> str:

750 """Keep only the last non-empty path-like segment. Handles both back and forwardslashes. Removes any leading/trailing slashes."""

751 stripped_text = str(text).strip().replace("\\", "/")

752 segments = [

753 segment for segment in stripped_text.split("/") if segment

754 ] # ignore empty segments

755 return segments[-1] if segments else ""

756

757 def wrap_fixed_shortstack(cell: str, width: int = MAX_CELL_LEN) -> str:

758 """Wrap long text to a fixed width for LaTeX tables."""

759 string_cell = last_path_segment(cell)

760 if len(string_cell) <= width:

761 return latex_escape_text(string_cell)

762 chunks = [

763 latex_escape_text(string_cell[index : index + width])

764 for index in range(0, len(string_cell), width)

765 ]

766 # left-aligned shortstack: forces line breaks and grows row height

767 return r"\shortstack[l]{" + r"\\ ".join(chunks) + "}"

768

769 def wrap_header_labels(

770 df: pd.DataFrame, width_per_cell: int = MAX_CELL_LEN

771 ) -> pd.DataFrame:

772 """Wrap long header labels to a fixed width for LaTeX tables."""

773 df_copy = df.copy()

774 if isinstance(df_copy.columns, pd.MultiIndex):

775 new_cols = []

776 for column in df_copy.columns:

777 new_cols.append(

778 tuple(

779 wrap_fixed_shortstack(last_path_segment(index), width_per_cell)

780 if isinstance(index, str)

781 else index

782 for index in column

783 )

784 )

785 names = [

786 (

787 wrap_fixed_shortstack(last_path_segment(name), width_per_cell)

788 if isinstance(name, str)

789 else name

790 )

791 for name in (df_copy.columns.names or [])

792 ]

793 df_copy.columns = pd.MultiIndex.from_tuples(new_cols, names=names)

794 else:

795 df_copy.columns = [

796 wrap_fixed_shortstack(last_path_segment(column), width_per_cell)

797 if isinstance(column, str)

798 else column

799 for column in df_copy.columns

800 ]

801 return df_copy

802

803 def format_cell(cell: Union[int, float, str]) -> str:

804 """Format a cell for printing in a LaTeX table."""

805 try:

806 float_cell = float(cell)

807 except (TypeError, ValueError):

808 return wrap_fixed_shortstack(last_path_segment(str(cell)), MAX_CELL_LEN)

809

810 if not math.isfinite(float_cell):

811 return "NaN"

812

813 if float_cell.is_integer():

814 return str(int(float_cell))

815 # round to MAX_DEC, then strip trailing zeros

816 stripped_cell = f"{round(float_cell, MAX_DEC):.{MAX_DEC}f}".rstrip("0").rstrip(

817 "."

818 )

819 return stripped_cell

820

821 df_copy = df.copy()

822

823 # Inorder to be able to show the key columns, we need to reset the index

824 if not isinstance(df_copy.index, pd.RangeIndex) and df_copy.index.name in (

825 None,

826 "index",

827 "",

828 ):

829 df_copy = df_copy.reset_index()

830

831 # Remove the Seed column from the performance dataframe since it is not

832 # very informative and clutters the table

833 if isinstance(df, PerformanceDataFrame):

834 mask = df_copy.columns.get_level_values("Meta") == "Seed"

835 df_copy = df_copy.loc[:, ~mask]

836

837 # For performance dataframe, we want to show values of objectives with their corresponding instance and run.

838 # Since objective, instance and run are indexes in the performance dataframe,

839 # they will be part of the index and we need to reset the index to get them

840 # as columns.

841 # We'll name them as key columns, since they are the key to identify the value of the objective

842 # for a given instance and run.

843 # (Respectively FeatureGroup, FeatureName, Extractor in feature dataframe)

844 keys = df_copy.iloc[:, :num_keys] # Key columns

845

846 # Split the dataframe into chunks of max_cols per page

847 number_column_chunks = max((df_copy.shape[1] - 1) // max_cols + 1, 1)

848 for i in range(number_column_chunks):

849 report.append(NewPage())

850 full_part = None

851 start_col = i * max_cols

852 end_col = (i + 1) * max_cols

853

854 # Select the value columns for this chunk

855 values = df_copy.iloc[

856 :,

857 start_col + num_keys : end_col + num_keys,

858 ]

859

860 # Concatenate the key and value columns

861 full_part = pd.concat([keys, values], axis=1)

862

863 # If there are no value columns left, we are done

864 if (full_part.shape[1]) <= num_keys:

865 break

866

867 full_part_wrapped = wrap_header_labels(full_part, MAX_CELL_LEN)

868

869 # tell pandas how to print numbers

870 formatters = {col: format_cell for col in full_part_wrapped.columns}

871

872 tex = full_part_wrapped.to_latex(

873 longtable=True,

874 index=False,

875 escape=False, # We want to split the long words, not escape them

876 caption=caption + (f" (part {i + 1})" if number_column_chunks > 1 else ""),

877 label=label + f"-p{i + 1}" if number_column_chunks > 1 else label,

878 float_format=None,

879 multicolumn=True,

880 multicolumn_format="c",

881 multirow=False,

882 column_format="c" * full_part_wrapped.shape[1],

883 formatters=formatters,

884 )

885

886 # centre the whole table horizontally

887 centred_tex = "\\begin{center}\n" + tex + "\\end{center}\n"

888

889 # rotate if still too wide

890 if full_part_wrapped.shape[1] > wide_threshold:

891 report.append(NoEscape(r"\begin{landscape}"))

892 report.append(NoEscape(centred_tex))

893 report.append(NoEscape(r"\end{landscape}"))

894 else:

895 report.append(NoEscape(centred_tex))

896

897

898def generate_appendix(

899 report: pl.Document,

900 performance_data: PerformanceDataFrame,

901 feature_data: FeatureDataFrame,

902) -> None:

903 """Appendix.

904

905 Args:

906 report: The LaTeX document object to which the appendix will be added.

907 performance_data: The performance data to be included in the appendix.

908 feature_data: The feature data to be included in the appendix.

909

910 Returns:

911 None

912 """

913 report.packages.append(pl.Package("pdflscape")) # Landscape pages

914 report.packages.append(pl.Package("longtable")) # Long tables

915 report.packages.append(pl.Package("booktabs")) # Better table formatting

916 report.append(pl.NewPage())

917 report.append(pl.NoEscape(r"\clearpage"))

918 report.append(pl.UnsafeCommand("appendix"))

919 report.append(pl.Section("Performance DataFrame"))

920

921 append_dataframe_longtable(

922 report,

923 performance_data,

924 caption="Performance DataFrame",

925 label="tab:perf_data",

926 max_cols=MAX_COLS_PER_TABLE,

927 wide_threshold=WIDE_TABLE_THRESHOLD,

928 num_keys=NUM_KEYS_PDF,

929 )

930

931 report.append(pl.Section("Feature DataFrame"))

932 append_dataframe_longtable(

933 report,

934 feature_data,

935 caption="Feature DataFrame",

936 label="tab:feature_data",

937 max_cols=MAX_COLS_PER_TABLE,

938 wide_threshold=WIDE_TABLE_THRESHOLD,

939 num_keys=NUM_KEYS_FDF,

940 )

941

942

943def main(argv: list[str]) -> None:

944 """Generate a report for executed experiments in the platform."""

945 # Log command call

946 sl.log_command(sys.argv, gv.settings().random_state)

947

948 # Define command line arguments

949 parser = parser_function()

950

951 # Process command line arguments

952 args = parser.parse_args(argv)

953

954 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path)

955 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path)

956

957 # Fetch all known scenarios

958 configuration_scenarios = gv.configuration_scenarios(refresh=True)

959 selection_scenarios = gv.selection_scenarios(refresh=True)

960 parallel_portfolio_scenarios = gv.parallel_portfolio_scenarios()

961

962 # Filter scenarios based on args

963 if args.solvers:

964 solvers = [

965 resolve_object_name(

966 solver,

967 gv.solver_nickname_mapping,

968 gv.settings().DEFAULT_solver_dir,

969 Solver,

970 )

971 for solver in args.solvers

972 ]

973 configuration_scenarios = [

974 scenario

975 for scenario in configuration_scenarios

976 if scenario.solver.directory in [solver.directory for solver in solvers]

977 ]

978 selection_scenarios = [

979 scenario

980 for scenario in selection_scenarios

981 if set(scenario.solvers).intersection(

982 [str(solver.directory) for solver in solvers]

983 )

984 ]

985 parallel_portfolio_scenarios = [

986 scenario

987 for scenario in parallel_portfolio_scenarios

988 if set(scenario.solvers).intersection(

989 [str(solver.directory) for solver in solvers]

990 )

991 ]

992 if args.instance_sets:

993 instance_sets = [

994 resolve_object_name(

995 instance_set,

996 gv.instance_set_nickname_mapping,

997 gv.settings().DEFAULT_instance_dir,

998 Instance_Set,

999 )

1000 for instance_set in args.instance_sets

1001 ]

1002 configuration_scenarios = [

1003 scenario

1004 for scenario in configuration_scenarios

1005 if scenario.instance_set.directory

1006 in [instance_set.directory for instance_set in instance_sets]

1007 ]

1008 selection_scenarios = [

1009 scenario

1010 for scenario in selection_scenarios

1011 if set(scenario.instance_sets).intersection(

1012 [str(instance_set.name) for instance_set in instance_sets]

1013 )

1014 ]

1015 parallel_portfolio_scenarios = [

1016 scenario

1017 for scenario in parallel_portfolio_scenarios

1018 if set(scenario.instance_sets).intersection(

1019 [str(instance_set.name) for instance_set in instance_sets]

1020 )

1021 ]

1022

1023 processed_configuration_scenarios = []

1024 processed_selection_scenarios = []

1025 possible_test_sets = [

1026 Instance_Set(possible_test_set)

1027 for possible_test_set in gv.settings().DEFAULT_instance_dir.iterdir()

1028 ]

1029 for configuration_scenario in configuration_scenarios:

1030 processed_configuration_scenarios.append(

1031 (

1032 ConfigurationOutput(

1033 configuration_scenario, performance_data, possible_test_sets

1034 ),

1035 configuration_scenario,

1036 )

1037 )

1038 for selection_scenario in selection_scenarios:

1039 processed_selection_scenarios.append(

1040 (SelectionOutput(selection_scenario), selection_scenario)

1041 )

1042 if (

1043 not configuration_scenarios

1044 and not selection_scenarios

1045 and not parallel_portfolio_scenarios

1046 ):

1047 print("No scenarios found. Exiting.")

1048 sys.exit(-1)

1049 raw_output = gv.settings().DEFAULT_output_analysis / "JSON"

1050 if raw_output.exists(): # Clean

1051 shutil.rmtree(raw_output)

1052 raw_output.mkdir()

1053

1054 # Write JSON

1055 output_json = {}

1056 for output, configuration_scenario in processed_configuration_scenarios:

1057 output_json[configuration_scenario.name] = output.serialise()

1058 for output, selection_scenario in processed_selection_scenarios:

1059 output_json[selection_scenario.name] = output.serialise()

1060 # TODO: We do not have an output object for parallel portfolios

1061

1062 raw_output_json = raw_output / "output.json"

1063 with raw_output_json.open("w") as f:

1064 json.dump(output_json, f, indent=4)

1065

1066 print(f"Machine readable output written to: {raw_output_json}")

1067

1068 if args.only_json: # Done

1069 sys.exit(0)

1070

1071 # TODO: Group scenarios based on:

1072 # - Configuration / Selection / Parallel Portfolio

1073 # - Training Instance Set / Testing Instance Set

1074 # - Configurators can be merged as long as we can match their budgets clearly

1075 report_directory = gv.settings().DEFAULT_output_analysis / "report"

1076 if report_directory.exists(): # Clean it

1077 shutil.rmtree(report_directory)

1078 report_directory.mkdir()

1079 target_path = report_directory / "report"

1080 report = pl.document.Document(

1081 default_filepath=str(target_path), document_options=["british"]

1082 )

1083 bibpath = gv.settings().bibliography_path

1084 newbibpath = report_directory / "report.bib"

1085 shutil.copy(bibpath, newbibpath)

1086 # BUGFIX for unknown package load in PyLatex

1087 lastpage_package = pl.package.Package("lastpage")

1088 if lastpage_package in report.packages:

1089 report.packages.remove(lastpage_package)

1090 report.packages.append(

1091 pl.package.Package(

1092 "geometry",

1093 options=[

1094 "verbose",

1095 "tmargin=3.5cm",

1096 "bmargin=3.5cm",

1097 "lmargin=3cm",

1098 "rmargin=3cm",

1099 ],

1100 )

1101 )

1102 # Unsafe command for \emph{Sparkle}

1103 report.preamble.extend(

1104 [

1105 pl.UnsafeCommand("title", r"\emph{Sparkle} Algorithm Portfolio report"),

1106 pl.UnsafeCommand(

1107 "author",

1108 r"Generated by \emph{Sparkle} "

1109 f"(version: {__sparkle_version__})",

1110 ),

1111 ]

1112 )

1113 report.append(pl.Command("maketitle"))

1114 report.append(pl.Section("Introduction"))

1115 # TODO: A quick overview to the introduction on whats considered in the report

1116 # regarding Solvers, Instance Sets and Feature Extractors

1117 solver_tool = (

1118 "RunSolver" if gv.settings().DEFAULT_runsolver_exec.exists() else "PyRunSolver"

1119 )

1120 report.append(

1121 pl.UnsafeCommand(

1122 r"emph{Sparkle}~\cite{Hoos15} is a multi-agent problem-solving platform based on"

1123 r" Programming by Optimisation (PbO)~\cite{Hoos12}, and would provide a number "

1124 "of effective algorithm optimisation techniques (such as automated algorithm "

1125 "configuration, portfolio-based algorithm selection, etc.) to accelerate the "

1126 f"existing solvers. All computation and memory measurements are done by {solver_tool}."

1127 )

1128 )

1129

1130 for scenario_output, scenario in processed_configuration_scenarios:

1131 generate_configuration_section(report, scenario, scenario_output)

1132

1133 for scenario_output, scenario in processed_selection_scenarios:

1134 generate_selection_section(report, scenario, scenario_output)

1135

1136 for parallel_dataframe in parallel_portfolio_scenarios:

1137 generate_parallel_portfolio_section(report, parallel_dataframe)

1138

1139 # Check if user wants to add appendix and

1140 settings = gv.settings(args)

1141 if settings.appendices:

1142 generate_appendix(report, performance_data, feature_data)

1143

1144 # Adding bibliography

1145 report.append(pl.NewPage()) # Ensure it starts on new page

1146 report.append(pl.Command("bibliographystyle", arguments=["plain"]))

1147 report.append(pl.Command("bibliography", arguments=[str(newbibpath)]))

1148 # Generate the report .tex and .pdf

1149 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")

1150 # TODO: This should be done by PyLatex. Generate the bib and regenerate the report

1151 # Reference for the (terrible) solution: https://tex.stackexchange.com/

1152 # questions/63852/question-mark-or-bold-citation-key-instead-of-citation-number

1153 import subprocess

1154

1155 # Run BibTex silently

1156 subprocess.run(

1157 ["bibtex", newbibpath.with_suffix("")],

1158 stdout=subprocess.DEVNULL,

1159 stderr=subprocess.DEVNULL,

1160 )

1161 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")

1162 report.generate_pdf(target_path, clean=False, clean_tex=False, compiler="pdflatex")

1163 print(f"Report generated at {target_path}.pdf")

1164 sys.exit(0)

1165

1166

1167if __name__ == "__main__":

1168 main(sys.argv[1:])

Coverage for src / sparkle / CLI / generate_report.py: 65%

470 statements