Coverage for src/sparkle/solver/solver

1#!/usr/bin/env python3

2# -*- coding: UTF-8 -*-

3"""Run a solver, read/write to performance dataframe."""

5import sys

6from filelock import FileLock

7import argparse

8from pathlib import Path

9import random

10import time

12from runrunner import Runner

14from sparkle.solver import Solver

15from sparkle.types import resolve_objective

16from sparkle.structures import PerformanceDataFrame

17from sparkle.tools.solver_wrapper_parsing import parse_commandline_dict

20def main(argv: list[str]) -> None:

21 """Main function of the command."""

22 # Define command line arguments

23 parser = argparse.ArgumentParser()

24 parser.add_argument(

25 "--performance-dataframe",

26 required=True,

27 type=Path,

28 help="path to the performance dataframe",

29 )

30 parser.add_argument("--solver", required=True, type=Path, help="path to solver")

31 parser.add_argument(

32 "--instance",

33 required=True,

34 type=Path,

35 nargs="+",

36 help="path to instance to run on",

37 )

38 parser.add_argument(

39 "--run-index",

40 required=True,

41 type=int,

42 help="run index in the dataframe to set.",

43 )

44 parser.add_argument(

45 "--log-dir", type=Path, required=True, help="path to the log directory"

46 )

48 # These two arguments should be mutually exclusive

49 parser.add_argument(

50 "--configuration-id",

51 type=str,

52 required=False,

53 help="configuration id to read from the PerformanceDataFrame.",

54 )

55 parser.add_argument(

56 "--configuration",

57 type=str,

58 nargs="+",

59 required=False,

60 help="configuration for the solver",

61 )

63 parser.add_argument(

64 "--seed",

65 type=int,

66 required=False,

67 help="seed to use for the solver. If not provided, generates one.",

68 )

69 parser.add_argument(

70 "--cutoff-time",

71 type=int,

72 required=False,

73 help="the cutoff time for the solver.",

74 )

75 parser.add_argument(

76 "--objectives",

77 type=str,

78 required=False,

79 nargs="+",

80 help="The objectives to evaluate to Solver on. If not provided, read from the PerformanceDataFrame.",

81 )

82 parser.add_argument(

83 "--target-objective",

84 required=False,

85 type=str,

86 help="The objective to use to determine the best configuration.",

87 )

88 parser.add_argument(

89 "--best-configuration-instances",

90 required=False,

91 type=str,

92 nargs="+",

93 help="If given, will ignore any given configurations, and try to"

94 " determine the best found configurations over the given "

95 "instances. Uses the 'target-objective' given in the arguments"

96 " or the first one given by the dataframe to determine the best"

97 "configuration.",

98 )

99 args = parser.parse_args(argv)

100 # Process command line arguments

101 log_dir = args.log_dir

102 print(f"Running Solver and read/writing results with {args.performance_dataframe}")

103 # Resolve possible multi-file instance

104 instance_path: list[Path] = args.instance

105 # If instance is only one file then we don't need a list

106 instance_path = instance_path[0] if len(instance_path) == 1 else instance_path

107 instance_name = (

108 instance_path.stem if isinstance(instance_path, Path) else instance_path[0].stem

109 )

110 run_index = args.run_index

111 # Ensure stringifcation of path objects

112 if isinstance(instance_path, list):

113 # Double list because of solver.run

114 run_instances = [[str(filepath) for filepath in instance_path]]

115 else:

116 run_instances = str(instance_path)

117

118 solver = Solver(args.solver)

119 # By default, run the default configuration

120 config_id = PerformanceDataFrame.default_configuration

121 configuration = None

122 # If no seed is provided by CLI, generate one

123 seed = args.seed if args.seed else random.randint(0, 2**32 - 1)

124 # Parse the provided objectives if present

125 objectives = (

126 [resolve_objective(o) for o in args.objectives] if args.objectives else None

127 )

128

129 if args.configuration: # Configuration provided, override

130 configuration = parse_commandline_dict(args.configuration)

131 config_id = configuration["configuration_id"]

132 elif (

133 (

134 args.configuration_id

135 and args.configuration_id != PerformanceDataFrame.default_configuration

136 )

137 or args.best_configuration_instances

138 or not objectives

139 ): # Read from PerformanceDataFrame, can be slow

140 # Desyncronize from other possible jobs writing to the same file

141 print(

142 "Reading from Performance DataFrame.. "

143 f"[{'configuration' if (args.configuration_id or args.best_configuration_instances) else ''} "

144 f"{'objectives' if not objectives else ''}]"

145 )

146 time.sleep(random.random() * 10)

147 lock = FileLock(f"{args.performance_dataframe}.lock") # Lock the file

148 with lock.acquire(timeout=600):

149 performance_dataframe = PerformanceDataFrame(args.performance_dataframe)

150

151 if not objectives:

152 objectives = performance_dataframe.objectives

153

154 if args.best_configuration_instances: # Determine best configuration

155 best_configuration_instances: list[str] = args.best_configuration_instances

156 # Get the unique instance names

157 best_configuration_instances = list(

158 set([Path(instance).stem for instance in best_configuration_instances])

159 )

160 target_objective = (

161 resolve_objective(args.target_objective)

162 if args.target_objective

163 else objectives[0]

164 )

165 config_id, _ = performance_dataframe.best_configuration(

166 solver=str(args.solver),

167 objective=target_objective,

168 instances=best_configuration_instances,

169 )

170 configuration = performance_dataframe.get_full_configuration(

171 str(args.solver), config_id

172 )

173

174 elif (

175 args.configuration_id

176 ): # Read from PerformanceDataFrame the configuration using the ID

177 config_id = args.configuration_id

178 configuration = performance_dataframe.get_full_configuration(

179 str(args.solver), config_id

180 )

181

182 print(f"Running Solver {solver} on instance {instance_name} with seed {seed}..")

183 solver_output = solver.run(

184 run_instances,

185 objectives=objectives,

186 seed=seed,

187 configuration=configuration.copy() if configuration else None,

188 cutoff_time=args.cutoff_time,

189 log_dir=log_dir,

190 run_on=Runner.LOCAL,

191 )

192

193 # Prepare the results for the DataFrame for each objective

194 result = [

195 [solver_output[objective.name] for objective in objectives],

196 [seed] * len(objectives),

197 ]

198 solver_fields = [

199 PerformanceDataFrame.column_value,

200 PerformanceDataFrame.column_seed,

201 ]

202

203 print(f"For Solver/config: {solver}/{config_id}")

204 print(f"For index: Instance {instance_name}, Run {args.run_index}, Seed {seed}")

205 print("Appending the following objective values:") # {', '.join(objective_values)}")

206 for objective in objectives:

207 print(

208 f"{objective.name}, {instance_name}, {args.run_index} | {args.solver}, {config_id}: {solver_output[objective.name]}"

209 )

210

211 # Desyncronize from other possible jobs writing to the same file

212 time.sleep(random.random() * 100)

213 # TESTLOG

214 # lock = FileLock("test.log.lock")

215 # with lock.acquire(timeout=600):

216 # with Path("test.log").open("a") as f:

217 # for objective in objectives:

218 # f.write(f"{objective.name}, {instance_name}, {args.run_index} | {solver} {config_id}: {solver_output[objective.name]}, {seed}\n")

219

220 # Now that we have all the results, we can add them to the performance dataframe

221 lock = FileLock(f"{args.performance_dataframe}.lock") # Lock the file

222 with lock.acquire(timeout=600):

223 performance_dataframe = PerformanceDataFrame(args.performance_dataframe)

224 performance_dataframe.set_value(

225 result,

226 solver=str(args.solver),

227 instance=instance_name,

228 configuration=config_id,

229 objective=[o.name for o in objectives],

230 run=run_index,

231 solver_fields=solver_fields,

232 append_write_csv=True, # We do not have to save the PDF here, thanks to this argument

233 )

234

235

236if __name__ == "__main__":

237 main(sys.argv[1:])

Coverage for src/sparkle/solver/solver_cli.py: 74%

76 statements