Coverage for sparkle/solver/ablation.py: 33%

1#!/usr/bin/env python3

2# -*- coding: UTF-8 -*-

3"""Helper functions for ablation analysis."""

4from __future__ import annotations

5import re

6import shutil

7import decimal

8from pathlib import Path

10import runrunner as rrr

11from runrunner.base import Runner, Run

13from sparkle.CLI.help import global_variables as gv

14from sparkle.CLI.help import logging as sl

16from sparkle.configurator.implementations import SMAC2

17from sparkle.platform import CommandName

18from sparkle.solver import Solver

19from sparkle.instance import InstanceSet

22class AblationScenario:

23 """Class for ablation analysis."""

24 def __init__(self: AblationScenario,

25 solver: Solver,

26 train_set: InstanceSet,

27 test_set: InstanceSet,

28 output_dir: Path,

29 ablation_executable: Path = None,

30 ablation_validation_executable: Path = None,

31 override_dirs: bool = False) -> None:

32 """Initialize ablation scenario.

34 Args:

35 solver: Solver object

36 train_set: The training instance

37 test_set: The test instance

38 output_dir: The output directory

39 ablation_executable: (Only for execution) The ablation executable

40 ablation_validation_executable: (Only for execution) The validation exec

41 override_dirs: Whether to clean the scenario directory if it already exists

42 """

43 self.ablation_exec = ablation_executable

44 self.ablation_validation_exec = ablation_validation_executable

45 self.solver = solver

46 self.train_set = train_set

47 self.test_set = test_set

48 self.output_dir = output_dir

49 self.scenario_name = f"{self.solver.name}_{self.train_set.name}"

50 if self.test_set is not None:

51 self.scenario_name += f"_{self.test_set.name}"

52 self.scenario_dir = self.output_dir / self.scenario_name

53 if override_dirs and self.scenario_dir.exists():

54 print("Warning: found existing ablation scenario. This will be removed.")

55 shutil.rmtree(self.scenario_dir)

57 # Create required scenario directories

58 self.tmp_dir = self.scenario_dir / "tmp"

59 self.tmp_dir.mkdir(parents=True, exist_ok=True)

61 self.validation_dir = self.scenario_dir / "validation"

62 self.validation_dir_tmp = self.validation_dir / "tmp"

63 self.validation_dir_tmp.mkdir(parents=True, exist_ok=True)

64 self.table_file = self.validation_dir / "log" / "ablation-validation-run1234.txt"

66 def create_configuration_file(self: AblationScenario) -> None:

67 """Create a configuration file for ablation analysis.

69 Args:

70 solver: Solver object

71 instance_train_name: The training instance

72 instance_test_name: The test instance

74 Returns:

75 None

76 """

77 ablation_scenario_dir = self.scenario_dir

78 objective = gv.settings().get_general_sparkle_objectives()[0]

79 configurator = gv.settings().get_general_sparkle_configurator()

80 _, opt_config_str = configurator.get_optimal_configuration(

81 self.solver, self.train_set, objective=objective)

83 # We need to check which params are missing and supplement with default values

84 pcs = self.solver.get_pcs()

85 for p in pcs:

86 if p["name"] not in opt_config_str:

87 opt_config_str += f" -{p['name']} {p['default']}"

89 # Ablation cannot deal with E scientific notation in floats

90 ctx = decimal.Context(prec=16)

91 for config in opt_config_str.split(" -"):

92 _, value = config.strip().split(" ")

93 if "e" in value.lower():

94 value = value.strip("'")

95 float_value = float(value.lower())

96 formatted = format(ctx.create_decimal(float_value), "f")

97 opt_config_str = opt_config_str.replace(value, formatted)

99 smac_run_obj = SMAC2.get_smac_run_obj(objective)

100 objective_str = "MEAN10" if smac_run_obj == "RUNTIME" else "MEAN"

101 run_cutoff_time = gv.settings().get_general_target_cutoff_time()

102 run_cutoff_length = gv.settings().get_configurator_target_cutoff_length()

103 concurrent_clis = gv.settings().get_slurm_max_parallel_runs_per_node()

104 ablation_racing = gv.settings().get_ablation_racing_flag()

105 configurator = gv.settings().get_general_sparkle_configurator()

106 pcs_file_path = f"{self.solver.get_pcs_file().absolute()}" # Get Solver PCS

107

108 # Create config file

109 config_file = Path(f"{ablation_scenario_dir}/ablation_config.txt")

110 config = (f'algo = "{configurator.configurator_target.absolute()} '

111 f'{self.solver.directory.absolute()} {objective}"\n'

112 f"execdir = {self.tmp_dir.absolute()}\n"

113 "experimentDir = ./\n"

114 f"deterministic = {1 if self.solver.deterministic else 0}\n"

115 f"run_obj = {smac_run_obj}\n"

116 f"overall_obj = {objective_str}\n"

117 f"cutoffTime = {run_cutoff_time}\n"

118 f"cutoff_length = {run_cutoff_length}\n"

119 f"cli-cores = {concurrent_clis}\n"

120 f"useRacing = {ablation_racing}\n"

121 "seed = 1234\n"

122 f"paramfile = {pcs_file_path}\n"

123 "instance_file = instances_train.txt\n"

124 "test_instance_file = instances_test.txt\n"

125 "sourceConfiguration=DEFAULT\n"

126 f'targetConfiguration="{opt_config_str}"')

127 config_file.open("w").write(config)

128 # Write config to validation directory

129 conf_valid = config.replace(f"execdir = {self.tmp_dir.absolute()}\n",

130 f"execdir = {self.validation_dir_tmp.absolute()}\n")

131 (self.validation_dir / config_file.name).open("w").write(conf_valid)

132

133 def create_instance_file(self: AblationScenario, test: bool = False) -> None:

134 """Create an instance file for ablation analysis."""

135 file_suffix = "_train.txt"

136 instance_set = self.train_set

137 if test:

138 file_suffix = "_test.txt"

139 instance_set = self.test_set if self.test_set is not None else self.train_set

140 # We give the Ablation script the paths of the instances

141 file_instance = self.scenario_dir / f"instances{file_suffix}"

142 with file_instance.open("w") as fh:

143 for instance in instance_set._instance_paths:

144 # We need to unpack the multi instance file paths in quotes

145 if isinstance(instance, list):

146 joined_instances = " ".join(

147 [str(file.absolute()) for file in instance])

148 fh.write(f"{joined_instances}\n")

149 else:

150 fh.write(f"{instance.absolute()}\n")

151 # Copy to validation directory

152 shutil.copyfile(file_instance, self.validation_dir / file_instance.name)

153

154 def check_for_ablation(self: AblationScenario) -> bool:

155 """Checks if ablation has terminated successfully."""

156 if not self.table_file.is_file():

157 return False

158 # First line in the table file should be "Ablation analysis validation complete."

159 table_line = self.table_file.open().readline().strip()

160 return table_line == "Ablation analysis validation complete."

161

162 def read_ablation_table(self: AblationScenario) -> list[list[str]]:

163 """Read from ablation table of a scenario."""

164 if not self.check_for_ablation():

165 # No ablation table exists for this solver-instance pair

166 return []

167 results = [["Round", "Flipped parameter", "Source value", "Target value",

168 "Validation result"]]

169

170 for line in self.table_file.open().readlines():

171 # Pre-process lines from the ablation file and add to the results dictionary.

172 # Sometimes ablation rounds switch multiple parameters at once.

173 # EXAMPLE: 2 EDR, EDRalpha 0, 0.1 1, 0.1013241633106732 486.31691

174 # To split the row correctly, we remove the space before the comma separated

175 # parameters and add it back.

176 # T.S. 30-01-2024: the results object is a nested list not dictionary?

177 values = re.sub(r"\s+", " ", line.strip())

178 values = re.sub(r", ", ",", values)

179 values = [val.replace(",", ", ") for val in values.split(" ")]

180 if len(values) == 5:

181 results.append(values)

182 return results

183

184 def submit_ablation(self: AblationScenario,

185 run_on: Runner = Runner.SLURM) -> list[Run]:

186 """Submit an ablation job.

187

188 Args:

189 run_on: Determines to which RunRunner queue the job is added

190

191 Returns:

192 A list of Run objects. Empty when running locally.

193 """

194 # 1. submit the ablation to the runrunner queue

195 clis = gv.settings().get_slurm_max_parallel_runs_per_node()

196 cmd = f"{self.ablation_exec.absolute()} --optionFile ablation_config.txt"

197 srun_options = ["-N1", "-n1", f"-c{clis}"]

198 sbatch_options = [f"--cpus-per-task={clis}"] +\

199 gv.settings().get_slurm_extra_options(as_args=True)

200

201 run_ablation = rrr.add_to_queue(

202 runner=run_on,

203 cmd=cmd,

204 name=CommandName.RUN_ABLATION,

205 base_dir=sl.caller_log_dir,

206 path=self.scenario_dir,

207 sbatch_options=sbatch_options,

208 srun_options=srun_options)

209

210 runs = []

211 if run_on == Runner.LOCAL:

212 run_ablation.wait()

213 runs.append(run_ablation)

214

215 # 2. Run ablation validation run if we have a test set to run on

216 if self.test_set is not None:

217 # Validation dir should have a copy of all needed files, except for the

218 # output of the ablation run, which is stored in ablation-run[seed].txt

219 cmd = f"{self.ablation_validation_exec.absolute()} "\

220 "--optionFile ablation_config.txt "\

221 "--ablationLogFile ../log/ablation-run1234.txt"

222

223 run_ablation_validation = rrr.add_to_queue(

224 runner=run_on,

225 cmd=cmd,

226 name=CommandName.RUN_ABLATION_VALIDATION,

227 path=self.validation_dir,

228 base_dir=sl.caller_log_dir,

229 dependencies=run_ablation,

230 sbatch_options=sbatch_options,

231 srun_options=srun_options)

232

233 if run_on == Runner.LOCAL:

234 run_ablation_validation.wait()

235 runs.append(run_ablation_validation)

236

237 return runs