Coverage for sparkle/solver/validator.py: 17%

109 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-27 09:10 +0000

1"""File containing the Validator class.""" 

2 

3from __future__ import annotations 

4 

5import sys 

6from pathlib import Path 

7import csv 

8import ast 

9import runrunner as rrr 

10from runrunner import Runner, Run 

11 

12from sparkle.solver import Solver 

13from sparkle.instance import InstanceSet 

14from sparkle.types import SparkleObjective, resolve_objective 

15from sparkle.tools.runsolver_parsing import get_solver_args 

16 

17 

18class Validator(): 

19 """Class to handle the validation of solvers on instance sets.""" 

20 def __init__(self: Validator, 

21 out_dir: Path = Path(), 

22 tmp_out_dir: Path = Path()) -> None: 

23 """Construct the validator.""" 

24 self.out_dir = out_dir 

25 self.tmp_out_dir = tmp_out_dir 

26 

27 def validate(self: Validator, 

28 solvers: list[Path] | list[Solver] | Solver | Path, 

29 configurations: list[dict] | dict | Path, 

30 instance_sets: list[InstanceSet], 

31 objectives: list[SparkleObjective], 

32 cut_off: int, 

33 subdir: Path = None, 

34 dependency: list[Run] | Run = None, 

35 sbatch_options: list[str] = [], 

36 run_on: Runner = Runner.SLURM) -> Run: 

37 """Validate a list of solvers (with configurations) on a set of instances. 

38 

39 Args: 

40 solvers: list of solvers to validate 

41 configurations: list of configurations for each solver we validate. 

42 If a path is supplied, will use each line as a configuration. 

43 instance_sets: set of instance sets on which we want to validate each solver 

44 objectives: list of objectives to validate 

45 cut_off: maximum run time for the solver per instance 

46 subdir: The subdir where to place the output in the outputdir. If None, 

47 a semi-unique combination of solver_instanceset is created. 

48 dependency: Jobs to wait for before executing the validation. 

49 sbatch_options: list of slurm batch options 

50 run_on: whether to run on SLURM or local 

51 """ 

52 if not isinstance(solvers, list) and isinstance(configurations, list): 

53 # If we receive one solver but multiple configurations, we cas the 

54 # Solvers to a list of the same length 

55 solvers = [solvers] * len(configurations) 

56 elif not isinstance(configurations, list) and isinstance(solvers, list): 

57 # If there is only one configuration, we cast it to a list of the same 

58 # length as the solver list 

59 configurations = [configurations] * len(solvers) 

60 if not isinstance(solvers, list) or len(configurations) != len(solvers): 

61 print("Error: Number of solvers and configurations does not match!") 

62 sys.exit(-1) 

63 # Ensure we have the object representation of solvers 

64 solvers = [Solver(s) if isinstance(s, Path) else s for s in solvers] 

65 cmds = [] 

66 out_paths = [] 

67 for index, (solver, config) in enumerate(zip(solvers, configurations)): 

68 if config is None: 

69 config = {} 

70 elif isinstance(config, Path): 

71 # Point to the config line in file 

72 config = {"config_path": config} 

73 for instance_set in instance_sets: 

74 if subdir is None: 

75 out_path = self.out_dir / f"{solver.name}_{instance_set.name}" 

76 else: 

77 out_path = self.out_dir / subdir 

78 out_path.mkdir(exist_ok=True) 

79 for instance_path in instance_set._instance_paths: 

80 cmds.append(" ".join( 

81 solver.build_cmd(instance=instance_path.absolute(), 

82 objectives=objectives, 

83 seed=index, 

84 cutoff_time=cut_off, 

85 configuration=config))) 

86 out_paths.extend([out_path] * len(instance_set._instance_paths)) 

87 return rrr.add_to_queue( 

88 runner=run_on, 

89 cmd=cmds, 

90 name="validation", 

91 base_dir=self.tmp_out_dir, 

92 path=out_paths, 

93 dependencies=dependency, 

94 sbatch_options=sbatch_options, 

95 ) 

96 

97 def retrieve_raw_results(self: Validator, 

98 solver: Solver, 

99 instance_sets: InstanceSet | list[InstanceSet], 

100 subdir: Path = None, 

101 log_dir: Path = None) -> None: 

102 """Checks the raw results of a given solver for a specific instance_set. 

103 

104 Writes the raw results to a unified CSV file for the resolve/instance_set 

105 combination. 

106 

107 Args: 

108 solver: The solver for which to check the raw result path 

109 instance_sets: The set of instances for which to retrieve the results 

110 subdir: Subdir where the CSV is to be placed, passed to the append method. 

111 log_dir: The directory to search for log files. If none, defaults to 

112 the log directory of the Solver. 

113 """ 

114 if isinstance(instance_sets, InstanceSet): 

115 instance_sets = [instance_sets] 

116 if log_dir is None: 

117 log_dir = solver.raw_output_directory 

118 for res in log_dir.iterdir(): 

119 if res.suffix != ".rawres": 

120 continue 

121 solver_args = get_solver_args(res.with_suffix(".log")) 

122 solver_args = ast.literal_eval(solver_args) 

123 instance_path = Path(solver_args["instance"]) 

124 # Remove default args 

125 if "config_path" in solver_args: 

126 # The actual solver configuration can be found elsewhere 

127 row_idx = int(solver_args["seed"]) 

128 config_path = Path(solver_args["config_path"]) 

129 if not config_path.exists(): 

130 config_path = log_dir / config_path 

131 config_str = config_path.open("r").readlines()[row_idx] 

132 solver_args = Solver.config_str_to_dict(config_str) 

133 else: 

134 for def_arg in ["instance", "solver_dir", "cutoff_time", 

135 "seed", "objectives"]: 

136 if def_arg in solver_args: 

137 del solver_args[def_arg] 

138 solver_args = str(solver_args).replace('"', "'") 

139 

140 for instance_set in instance_sets: 

141 if instance_path.name in instance_set._instance_names: 

142 out_dict = Solver.parse_solver_output( 

143 "", 

144 ["-o", res.name, 

145 "-v", res.with_suffix(".val").name, 

146 "-w", res.with_suffix(".log").name], 

147 log_dir) 

148 self.append_entry_to_csv(solver.name, 

149 solver_args, 

150 instance_set, 

151 instance_path.name, 

152 solver_output=out_dict, 

153 subdir=subdir) 

154 res.unlink() 

155 res.with_suffix(".val").unlink(missing_ok=True) 

156 res.with_suffix(".log").unlink(missing_ok=True) 

157 

158 def get_validation_results(self: Validator, 

159 solver: Solver, 

160 instance_set: InstanceSet, 

161 source_dir: Path = None, 

162 subdir: Path = None, 

163 config: str = None) -> list[list[str]]: 

164 """Query the results of the validation of solver on instance_set. 

165 

166 Args: 

167 solver: Solver object 

168 instance_set: Instance set 

169 source_dir: Path where to look for any unprocessed output. 

170 By default, look in the solver's tmp dir. 

171 subdir: Path where to place the .csv file subdir. By default will be 

172 'self.outputdir/solver.name_instanceset.name/validation.csv' 

173 config: Path to the configuration if the solver was configured, None 

174 otherwise 

175 Returns 

176 A list of row lists with string values 

177 """ 

178 if source_dir is None: 

179 source_dir = self.out_dir / f"{solver.name}_{instance_set.name}" 

180 if any(x.suffix == ".rawres" for x in source_dir.iterdir()): 

181 self.retrieve_raw_results( 

182 solver, instance_set, subdir=subdir, log_dir=source_dir) 

183 if subdir is None: 

184 subdir = Path(f"{solver.name}_{instance_set.name}") 

185 csv_file = self.out_dir / subdir / "validation.csv" 

186 csv_data = [line for line in csv.reader(csv_file.open("r"))] 

187 header = csv_data[0] 

188 if config is not None: 

189 # We filter on the config string by subdict 

190 if isinstance(config, str): 

191 config = Solver.config_str_to_dict(config) 

192 csv_data = [line for line in csv_data[1:] if 

193 config.items() == ast.literal_eval(line[1]).items()] 

194 csv_data.insert(0, header) 

195 return csv_data 

196 

197 def append_entry_to_csv(self: Validator, 

198 solver: str, 

199 config_str: str, 

200 instance_set: InstanceSet, 

201 instance: str, 

202 solver_output: dict, 

203 subdir: Path = None) -> None: 

204 """Append a validation result as a row to a CSV file.""" 

205 if subdir is None: 

206 subdir = Path(f"{solver}_{instance_set.name}") 

207 out_dir = self.out_dir / subdir 

208 if not out_dir.exists(): 

209 out_dir.mkdir(parents=True) 

210 csv_file = out_dir / "validation.csv" 

211 status = solver_output["status"] 

212 cpu_time = solver_output["cpu_time"] 

213 wall_time = solver_output["wall_time"] 

214 del solver_output["status"] 

215 del solver_output["cpu_time"] 

216 del solver_output["wall_time"] 

217 sorted_keys = sorted(solver_output) 

218 objectives = [resolve_objective(key) for key in sorted_keys] 

219 objectives = [o for o in objectives if o is not None] 

220 if not csv_file.exists(): 

221 # Write header 

222 header = ["Solver", "Configuration", "InstanceSet", "Instance", "Status", 

223 "CPU Time", "Wallclock Time"] + [o.name for o in objectives] 

224 with csv_file.open("w") as out: 

225 csv.writer(out).writerow((header)) 

226 values = [solver, config_str, instance_set.name, instance, status, cpu_time, 

227 wall_time] + [solver_output[o.name] for o in objectives] 

228 with csv_file.open("a") as out: 

229 writer = csv.writer(out) 

230 writer.writerow(values)