Coverage for sparkle/solver/validator.py: 17%

1"""File containing the Validator class."""

3from __future__ import annotations

5import sys

6from pathlib import Path

7import csv

8import ast

9import runrunner as rrr

10from runrunner import Runner, Run

12from sparkle.solver import Solver

13from sparkle.instance import InstanceSet

14from sparkle.types import SparkleObjective, resolve_objective

15from sparkle.tools.runsolver_parsing import get_solver_args

18class Validator():

19 """Class to handle the validation of solvers on instance sets."""

20 def __init__(self: Validator,

21 out_dir: Path = Path(),

22 tmp_out_dir: Path = Path()) -> None:

23 """Construct the validator."""

24 self.out_dir = out_dir

25 self.tmp_out_dir = tmp_out_dir

27 def validate(self: Validator,

28 solvers: list[Path] | list[Solver] | Solver | Path,

29 configurations: list[dict] | dict | Path,

30 instance_sets: list[InstanceSet],

31 objectives: list[SparkleObjective],

32 cut_off: int,

33 subdir: Path = None,

34 dependency: list[Run] | Run = None,

35 sbatch_options: list[str] = [],

36 run_on: Runner = Runner.SLURM) -> Run:

37 """Validate a list of solvers (with configurations) on a set of instances.

39 Args:

40 solvers: list of solvers to validate

41 configurations: list of configurations for each solver we validate.

42 If a path is supplied, will use each line as a configuration.

43 instance_sets: set of instance sets on which we want to validate each solver

44 objectives: list of objectives to validate

45 cut_off: maximum run time for the solver per instance

46 subdir: The subdir where to place the output in the outputdir. If None,

47 a semi-unique combination of solver_instanceset is created.

48 dependency: Jobs to wait for before executing the validation.

49 sbatch_options: list of slurm batch options

50 run_on: whether to run on SLURM or local

51 """

52 if not isinstance(solvers, list) and isinstance(configurations, list):

53 # If we receive one solver but multiple configurations, we cas the

54 # Solvers to a list of the same length

55 solvers = [solvers] * len(configurations)

56 elif not isinstance(configurations, list) and isinstance(solvers, list):

57 # If there is only one configuration, we cast it to a list of the same

58 # length as the solver list

59 configurations = [configurations] * len(solvers)

60 if not isinstance(solvers, list) or len(configurations) != len(solvers):

61 print("Error: Number of solvers and configurations does not match!")

62 sys.exit(-1)

63 # Ensure we have the object representation of solvers

64 solvers = [Solver(s) if isinstance(s, Path) else s for s in solvers]

65 cmds = []

66 out_paths = []

67 for index, (solver, config) in enumerate(zip(solvers, configurations)):

68 if config is None:

69 config = {}

70 elif isinstance(config, Path):

71 # Point to the config line in file

72 config = {"config_path": config}

73 for instance_set in instance_sets:

74 if subdir is None:

75 out_path = self.out_dir / f"{solver.name}_{instance_set.name}"

76 else:

77 out_path = self.out_dir / subdir

78 out_path.mkdir(exist_ok=True)

79 for instance_path in instance_set._instance_paths:

80 cmds.append(" ".join(

81 solver.build_cmd(instance=instance_path.absolute(),

82 objectives=objectives,

83 seed=index,

84 cutoff_time=cut_off,

85 configuration=config)))

86 out_paths.extend([out_path] * len(instance_set._instance_paths))

87 return rrr.add_to_queue(

88 runner=run_on,

89 cmd=cmds,

90 name="validation",

91 base_dir=self.tmp_out_dir,

92 path=out_paths,

93 dependencies=dependency,

94 sbatch_options=sbatch_options,

95 )

97 def retrieve_raw_results(self: Validator,

98 solver: Solver,

99 instance_sets: InstanceSet | list[InstanceSet],

100 subdir: Path = None,

101 log_dir: Path = None) -> None:

102 """Checks the raw results of a given solver for a specific instance_set.

103

104 Writes the raw results to a unified CSV file for the resolve/instance_set

105 combination.

106

107 Args:

108 solver: The solver for which to check the raw result path

109 instance_sets: The set of instances for which to retrieve the results

110 subdir: Subdir where the CSV is to be placed, passed to the append method.

111 log_dir: The directory to search for log files. If none, defaults to

112 the log directory of the Solver.

113 """

114 if isinstance(instance_sets, InstanceSet):

115 instance_sets = [instance_sets]

116 if log_dir is None:

117 log_dir = solver.raw_output_directory

118 for res in log_dir.iterdir():

119 if res.suffix != ".rawres":

120 continue

121 solver_args = get_solver_args(res.with_suffix(".log"))

122 solver_args = ast.literal_eval(solver_args)

123 instance_path = Path(solver_args["instance"])

124 # Remove default args

125 if "config_path" in solver_args:

126 # The actual solver configuration can be found elsewhere

127 row_idx = int(solver_args["seed"])

128 config_path = Path(solver_args["config_path"])

129 if not config_path.exists():

130 config_path = log_dir / config_path

131 config_str = config_path.open("r").readlines()[row_idx]

132 solver_args = Solver.config_str_to_dict(config_str)

133 else:

134 for def_arg in ["instance", "solver_dir", "cutoff_time",

135 "seed", "objectives"]:

136 if def_arg in solver_args:

137 del solver_args[def_arg]

138 solver_args = str(solver_args).replace('"', "'")

139

140 for instance_set in instance_sets:

141 if instance_path.name in instance_set._instance_names:

142 out_dict = Solver.parse_solver_output(

143 "",

144 ["-o", res.name,

145 "-v", res.with_suffix(".val").name,

146 "-w", res.with_suffix(".log").name],

147 log_dir)

148 self.append_entry_to_csv(solver.name,

149 solver_args,

150 instance_set,

151 instance_path.name,

152 solver_output=out_dict,

153 subdir=subdir)

154 res.unlink()

155 res.with_suffix(".val").unlink(missing_ok=True)

156 res.with_suffix(".log").unlink(missing_ok=True)

157

158 def get_validation_results(self: Validator,

159 solver: Solver,

160 instance_set: InstanceSet,

161 source_dir: Path = None,

162 subdir: Path = None,

163 config: str = None) -> list[list[str]]:

164 """Query the results of the validation of solver on instance_set.

165

166 Args:

167 solver: Solver object

168 instance_set: Instance set

169 source_dir: Path where to look for any unprocessed output.

170 By default, look in the solver's tmp dir.

171 subdir: Path where to place the .csv file subdir. By default will be

172 'self.outputdir/solver.name_instanceset.name/validation.csv'

173 config: Path to the configuration if the solver was configured, None

174 otherwise

175 Returns

176 A list of row lists with string values

177 """

178 if source_dir is None:

179 source_dir = self.out_dir / f"{solver.name}_{instance_set.name}"

180 if any(x.suffix == ".rawres" for x in source_dir.iterdir()):

181 self.retrieve_raw_results(

182 solver, instance_set, subdir=subdir, log_dir=source_dir)

183 if subdir is None:

184 subdir = Path(f"{solver.name}_{instance_set.name}")

185 csv_file = self.out_dir / subdir / "validation.csv"

186 csv_data = [line for line in csv.reader(csv_file.open("r"))]

187 header = csv_data[0]

188 if config is not None:

189 # We filter on the config string by subdict

190 if isinstance(config, str):

191 config = Solver.config_str_to_dict(config)

192 csv_data = [line for line in csv_data[1:] if

193 config.items() == ast.literal_eval(line[1]).items()]

194 csv_data.insert(0, header)

195 return csv_data

196

197 def append_entry_to_csv(self: Validator,

198 solver: str,

199 config_str: str,

200 instance_set: InstanceSet,

201 instance: str,

202 solver_output: dict,

203 subdir: Path = None) -> None:

204 """Append a validation result as a row to a CSV file."""

205 if subdir is None:

206 subdir = Path(f"{solver}_{instance_set.name}")

207 out_dir = self.out_dir / subdir

208 if not out_dir.exists():

209 out_dir.mkdir(parents=True)

210 csv_file = out_dir / "validation.csv"

211 status = solver_output["status"]

212 cpu_time = solver_output["cpu_time"]

213 wall_time = solver_output["wall_time"]

214 del solver_output["status"]

215 del solver_output["cpu_time"]

216 del solver_output["wall_time"]

217 sorted_keys = sorted(solver_output)

218 objectives = [resolve_objective(key) for key in sorted_keys]

219 objectives = [o for o in objectives if o is not None]

220 if not csv_file.exists():

221 # Write header

222 header = ["Solver", "Configuration", "InstanceSet", "Instance", "Status",

223 "CPU Time", "Wallclock Time"] + [o.name for o in objectives]

224 with csv_file.open("w") as out:

225 csv.writer(out).writerow((header))

226 values = [solver, config_str, instance_set.name, instance, status, cpu_time,

227 wall_time] + [solver_output[o.name] for o in objectives]

228 with csv_file.open("a") as out:

229 writer = csv.writer(out)

230 writer.writerow(values)