Coverage for sparkle/platform/generate_report_for_configuration.py: 84%

146 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-07 15:22 +0000

1#!/usr/bin/env python3 

2# -*- coding: UTF-8 -*- 

3"""Helper functions for algorithm configuration report generation.""" 

4from __future__ import annotations 

5 

6import sys 

7from pathlib import Path 

8 

9from scipy.stats import linregress 

10 

11from sparkle.platform import latex as stex 

12from sparkle.configurator.ablation import AblationScenario 

13from sparkle.configurator.configurator import ConfigurationScenario 

14from sparkle.instance import InstanceSet 

15from sparkle.structures import PerformanceDataFrame 

16from sparkle.types import SolverStatus 

17from sparkle import about 

18 

19from sparkle.platform.output.configuration_output import ConfigurationOutput 

20 

21 

22def get_features_bool(configurator_scenario: ConfigurationScenario, 

23 solver_name: str, train_set: InstanceSet) -> str: 

24 """Return a bool string for latex indicating whether features were used. 

25 

26 True if a feature file is given in the scenario file, false otherwise. 

27 

28 Args: 

29 solver_name: Name of the solver 

30 instance_set_train_name: Name of the instance set used for training 

31 

32 Returns: 

33 A string describing whether features are used 

34 """ 

35 scenario_file = configurator_scenario.directory \ 

36 / f"{solver_name}_{train_set.name}_scenario.txt" 

37 

38 for line in scenario_file.open("r").readlines(): 

39 if line.split(" ")[0] == "feature_file": 

40 return "\\featurestrue" 

41 return "\\featuresfalse" 

42 

43 

44def get_ablation_bool(scenario: AblationScenario) -> str: 

45 """Return the ablation bool as LaTeX string. 

46 

47 Args: 

48 solver: The solver object 

49 instance_train_name: Name of the trianing instance set 

50 instance_test_name: Name of the testing instance set 

51 

52 Returns: 

53 A string describing whether ablation was run or not 

54 """ 

55 if scenario.check_for_ablation(): 

56 return "\\ablationtrue" 

57 return "\\ablationfalse" 

58 

59 

60def plot_configured_vs_default( 

61 config_output: ConfigurationOutput, 

62 config_scenario: ConfigurationScenario, 

63 target_directory: Path, 

64 test_mode: bool = False,) -> str: 

65 """Create a figure comparing the configured and default solver. 

66 

67 Base function to create a comparison plot of a given instance set between the default 

68 and configured performance. 

69 

70 Args: 

71 config_output: Object representation of the ConfigurationOutput 

72 config_scenario: ConfigurationScenario 

73 target_directory: Directory for the configuration reports 

74 instance_set: InstanceSet to plot 

75 

76 Returns: 

77 A string containing the latex command to include the figure 

78 """ 

79 instance_set_name = (config_output.instance_set_train.name 

80 if not test_mode else config_output.instance_set_test.name) 

81 figure_filename =\ 

82 f"data_{config_output.solver.name}_configured_vs_default_on_{instance_set_name}" 

83 if not test_mode: 

84 points = [p for p in zip(config_output.default_performance_per_instance_train, 

85 config_output.best_conf_performance_per_instance_train)] 

86 else: 

87 points = [p for p in zip(config_output.default_performance_per_instance_train, 

88 config_output.best_conf_performance_per_instance_test)] 

89 objective_name = config_scenario.sparkle_objective.name 

90 plot_params = {"xlabel": f"Default parameters [{objective_name}]", 

91 "ylabel": f"Configured parameters [{objective_name}]", 

92 "scale": "linear", 

93 "limit_min": 1.5, 

94 "limit_max": 1.5, 

95 "replace_zeros": False, 

96 "output_dir": target_directory 

97 } 

98 # Check if the scale of the axis can be considered linear 

99 x_points = [p[0] for p in points] 

100 y_points = [p[1] for p in points] 

101 if not len(set(x_points)) == 1 and not len(set(y_points)) == 1: 

102 linearity_x = linregress(x_points, range(len(points))).rvalue > 0.5 

103 linearity_y = linregress(y_points, range(len(points))).rvalue > 0.5 

104 if not linearity_x or not linearity_y: 

105 plot_params["scale"] = "log" 

106 plot_params["replace_zeros"] = True 

107 

108 stex.generate_comparison_plot(points, 

109 figure_filename, 

110 **plot_params) 

111 

112 return f"\\includegraphics[width=0.6\\textwidth]{{{figure_filename}}}" 

113 

114 

115def get_timeouts_instanceset(config_output: ConfigurationOutput, 

116 instance_set: InstanceSet) -> tuple[int, int, int]: 

117 """Return the number of timeouts by configured, default and both on the instance set. 

118 

119 Args: 

120 configuration_scenario: ConfigurationScenario 

121 config_output: ConfigurationOutput 

122 

123 Returns: 

124 A tuple containing the number of timeouts for the different configurations 

125 """ 

126 solver_key = str(config_output.solver.directory) 

127 instance_keys = [str(instance) for instance in instance_set.instance_paths] 

128 # Determine status objective name 

129 objective = [o for o in config_output.performance_data.objectives 

130 if o.stem.lower() == "status"][0] 

131 _, configured_status = config_output.performance_data.configuration_performance( 

132 solver_key, 

133 configuration=config_output.best_configuration, 

134 objective=objective, 

135 instances=instance_keys, 

136 per_instance=True) 

137 _, default_status = config_output.performance_data.configuration_performance( 

138 solver_key, 

139 configuration=PerformanceDataFrame.missing_value, 

140 objective=objective, 

141 instances=instance_keys, 

142 per_instance=True) 

143 

144 default_timeouts, configured_timeouts, shared = 0, 0, 0 

145 for configured_status, default_status in zip(configured_status, default_status): 

146 configured_status, default_status =\ 

147 SolverStatus(configured_status), SolverStatus(default_status) 

148 if configured_status == SolverStatus.TIMEOUT: 

149 configured_timeouts += 1 

150 if default_status == SolverStatus.TIMEOUT: 

151 default_timeouts += 1 

152 if (configured_status == SolverStatus.TIMEOUT 

153 and default_status == SolverStatus.TIMEOUT): 

154 shared += 1 

155 return configured_timeouts, default_timeouts, shared 

156 

157 

158def get_ablation_table(scenario: AblationScenario) -> str: 

159 """Generate a LaTeX table of the ablation path. 

160 

161 This is the result of the ablation analysis to determine the parameter importance. 

162 

163 Args: 

164 solver: The solver object 

165 instance_set_train_name: Name of the instance set for training 

166 instance_set_test_name: Name of the instance set for testing 

167 

168 Returns: 

169 A string containing the LaTeX table code of the ablation path 

170 """ 

171 results = scenario.read_ablation_table() 

172 table_string = r"\begin{tabular}{rp{0.25\linewidth}rrr}" 

173 # "Round", "Flipped parameter", "Source value", "Target value", "Validation result" 

174 for i, line in enumerate(results): 

175 # If this fails something has changed in the representation of ablation tables 

176 if len(line) != 5: 

177 print("""ERROR: something has changed with the representation 

178 of ablation tables""") 

179 sys.exit(-1) 

180 if i == 0: 

181 line = [f"\\textbf{{{word}}}" for word in line] 

182 

183 # Put multiple variable changes in one round on a seperate line 

184 if (len(line[1].split(",")) > 1 

185 and len(line[1].split(",")) == len(line[2].split(",")) 

186 and len(line[1].split(",")) == len(line[3].split(","))): 

187 params = line[1].split(",") 

188 default_values = line[2].split(",") 

189 flipped_values = line[3].split(",") 

190 

191 sublines = len(params) 

192 for subline in range(sublines): 

193 round = "" if subline != 0 else line[0] 

194 result = "" if subline + 1 != sublines else line[-1] 

195 printline = [round, params[subline], default_values[subline], 

196 flipped_values[subline], result] 

197 table_string += " & ".join(printline) + " \\\\ " 

198 else: 

199 table_string += " & ".join(line) + " \\\\ " 

200 if i == 0: 

201 table_string += "\\hline " 

202 table_string += "\\end{tabular}" 

203 

204 return table_string 

205 

206 

207def configuration_report_variables(config_scenario: ConfigurationScenario, 

208 config_output: ConfigurationOutput, 

209 target_dir: Path, 

210 bib_path: Path, 

211 extractor_dir: Path, 

212 extractor_cutoff: int, 

213 ablation: AblationScenario = None) -> dict: 

214 """Return a dict matching LaTeX variables and their values. 

215 

216 Args: 

217 config_scenario: Object representation of the ConfigurationScenario 

218 config_output: Object representation of the ConfigurationOutput 

219 target_dir: Target directory 

220 bib_path: Path to the latex bib file. 

221 extractor_dir: General platform extractor Directory 

222 extractactor_cutoff: Extractor cut off time. 

223 ablation: Whether or not ablation is used. Defaults to True. 

224 

225 Returns: 

226 A dictionary containing the variables and values 

227 """ 

228 has_test = config_output.instance_set_test is not None 

229 full_dict = {"bibliographypath": bib_path.absolute(), 

230 "sparkleVersion": about.version} 

231 

232 full_dict.update(get_dict_variable_to_value_common(config_scenario, 

233 config_output, 

234 ablation, 

235 target_dir)) 

236 

237 if has_test: 

238 test_dict = get_dict_variable_to_value_test(config_output, 

239 config_scenario, 

240 target_dir, 

241 ablation) 

242 full_dict.update(test_dict) 

243 full_dict["testBool"] = f"\\test{str(has_test).lower()}" 

244 

245 if ablation is None: 

246 full_dict["ablationBool"] = "\\ablationfalse" 

247 

248 if full_dict["featuresBool"] == "\\featurestrue": 

249 full_dict["numFeatureExtractors"] =\ 

250 len([p for p in extractor_dir.iterdir()]) 

251 full_dict["featureExtractorList"] =\ 

252 stex.list_to_latex([(p.name, "") for p in extractor_dir.iterdir()]) 

253 full_dict["featureComputationCutoffTime"] = extractor_cutoff 

254 

255 return full_dict 

256 

257 

258def get_dict_variable_to_value_common(config_scenario: ConfigurationScenario, 

259 config_output: ConfigurationOutput, 

260 ablation: AblationScenario, 

261 target_directory: Path) -> dict: 

262 """Return a dict matching LaTeX variables and values used for all config. reports. 

263 

264 Args: 

265 config_scenario: Configuration scenario 

266 config_output: configuration output 

267 ablation: Ablation scenario, if ran 

268 target_directory: Path to directory with configuration reports 

269 

270 Returns: 

271 A dictionary containing the variables and values 

272 """ 

273 objective = config_scenario.sparkle_objective 

274 

275 latex_dict = {"objectiveName": objective.name, 

276 "configuratorName": config_output.configurator.name, 

277 "configuratorVersion": config_output.configurator.version, 

278 "configuratorFullName": config_output.configurator.full_name} 

279 

280 if objective.time: 

281 latex_dict["runtimeBool"] = "\\runtimetrue" 

282 latex_dict["objectiveType"] = "RUNTIME" 

283 else: 

284 latex_dict["runtimeBool"] = "\\runtimefalse" 

285 latex_dict["objectiveType"] = "QUALITY" 

286 if objective.minimise: 

287 latex_dict["minMaxAdjective"] = "lowest" 

288 else: 

289 latex_dict["minMaxAdjective"] = "highest" 

290 

291 latex_dict["solver"] = config_output.solver.name 

292 latex_dict["instanceSetTrain"] = config_scenario.instance_set.name 

293 latex_dict["numInstanceInTrainingInstanceSet"] = config_scenario.instance_set.size 

294 

295 latex_dict["numConfiguratorRuns"] = config_scenario.number_of_runs 

296 if hasattr(config_scenario, "wallclock_time"): 

297 latex_dict["wholeTimeBudget"] = config_scenario.wallclock_time 

298 elif hasattr(config_scenario, "smac3_scenario"): # SMAC3 

299 latex_dict["wholeTimeBudget"] = config_scenario.smac3_scenario.walltime_limit 

300 else: 

301 latex_dict["wholeTimeBudget"] = config_scenario.max_time 

302 latex_dict["eachRunCutoffTime"] = config_scenario.cutoff_time 

303 

304 opt_config_list = [f"{key}: {value}" for key, value in 

305 config_output.best_configuration.items()] 

306 latex_dict["optimisedConfiguration"] = stex.list_to_latex(opt_config_list) 

307 latex_dict["optimisedConfigurationTrainingPerformance"] =\ 

308 config_output.best_performance_train 

309 latex_dict["defaultConfigurationTrainingPerformance"] =\ 

310 config_output.default_performance_train 

311 

312 latex_dict["figure-configured-vs-default-train"] = plot_configured_vs_default( 

313 config_output, 

314 config_scenario, 

315 target_directory) 

316 

317 # Retrieve timeout numbers for the training instances 

318 configured_timeouts_train, default_timeouts_train, overlapping_timeouts_train =\ 

319 get_timeouts_instanceset( 

320 config_output, 

321 config_output.instance_set_train) 

322 

323 latex_dict["timeoutsTrainDefault"] = default_timeouts_train 

324 latex_dict["timeoutsTrainConfigured"] = configured_timeouts_train 

325 latex_dict["timeoutsTrainOverlap"] = overlapping_timeouts_train 

326 latex_dict["ablationBool"] = get_ablation_bool(ablation) 

327 latex_dict["ablationPath"] = get_ablation_table(ablation) 

328 latex_dict["featuresBool"] = get_features_bool( 

329 config_scenario, config_output.solver.name, config_scenario.instance_set) 

330 

331 return latex_dict 

332 

333 

334def get_dict_variable_to_value_test( 

335 config_output: ConfigurationOutput, 

336 configuration_scenario: ConfigurationScenario, 

337 target_dir: Path, 

338 ablation: AblationScenario) -> dict: 

339 """Return a dict matching test set specific latex variables with their values. 

340 

341 Args: 

342 config_output: Configuration output 

343 configuration_scenario: Configuration scenario 

344 target_dir: Path to where output should go 

345 ablation: Ablation scenario, if ran 

346 

347 Returns: 

348 A dictionary containting the variables and their values 

349 """ 

350 test_dict = {"instanceSetTest": config_output.instance_set_test.name} 

351 test_dict["numInstanceInTestingInstanceSet"] = config_output.instance_set_test.size 

352 test_dict["optimisedConfigurationTestingPerformance"] =\ 

353 config_output.best_performance_test 

354 test_dict["defaultConfigurationTestingPerformance"] =\ 

355 config_output.default_performance_test 

356 

357 test_dict["figure-configured-vs-default-test"] =\ 

358 plot_configured_vs_default( 

359 config_output, configuration_scenario, 

360 target_dir, test_mode=True) 

361 

362 # Retrieve timeout numbers for the testing instances 

363 configured_timeouts_test, default_timeouts_test, overlapping_timeouts_test =\ 

364 get_timeouts_instanceset(config_output, 

365 config_output.instance_set_test) 

366 

367 test_dict["timeoutsTestDefault"] = default_timeouts_test 

368 test_dict["timeoutsTestConfigured"] = configured_timeouts_test 

369 test_dict["timeoutsTestOverlap"] = overlapping_timeouts_test 

370 test_dict["ablationBool"] = get_ablation_bool(ablation) 

371 test_dict["ablationPath"] = get_ablation_table(ablation) 

372 return test_dict 

373 

374 

375def generate_report_for_configuration(config_scenario: ConfigurationScenario, 

376 config_output: ConfigurationOutput, 

377 extractor_dir: Path, 

378 target_path: Path, 

379 latex_template_path: Path, 

380 bibliography_path: Path, 

381 extractor_cuttoff: int, 

382 ablation: AblationScenario = None) -> None: 

383 """Generate a report for algorithm configuration. 

384 

385 Args: 

386 config_scenario: The configuration scenario to report 

387 config_output: The configuration output object of the scenario 

388 extractor_dir: Path to the extractor used 

389 target_path: Where the report files will be placed. 

390 latex_template_path: Path to the template to use for the report 

391 bibliography_path: The bib corresponding to the latex template 

392 extractor_cuttoff: Cut off for extractor 

393 ablation: The ablation scenario if ablation was run. 

394 """ 

395 target_path.mkdir(parents=True, exist_ok=True) 

396 variables_dict = configuration_report_variables( 

397 config_scenario, config_output, target_path, bibliography_path, extractor_dir, 

398 extractor_cuttoff, ablation) 

399 stex.generate_report(latex_template_path, 

400 "template-Sparkle-for-configuration.tex", 

401 target_path, 

402 "Sparkle_Report_for_Configuration", 

403 variables_dict)