Coverage for src/sparkle/solver/solver_cli.py: 74%
76 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-15 14:11 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-15 14:11 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Run a solver, read/write to performance dataframe."""
5import sys
6from filelock import FileLock
7import argparse
8from pathlib import Path
9import random
10import time
12from runrunner import Runner
14from sparkle.solver import Solver
15from sparkle.types import resolve_objective
16from sparkle.structures import PerformanceDataFrame
17from sparkle.tools.solver_wrapper_parsing import parse_commandline_dict
20def main(argv: list[str]) -> None:
21 """Main function of the command."""
22 # Define command line arguments
23 parser = argparse.ArgumentParser()
24 parser.add_argument(
25 "--performance-dataframe",
26 required=True,
27 type=Path,
28 help="path to the performance dataframe",
29 )
30 parser.add_argument("--solver", required=True, type=Path, help="path to solver")
31 parser.add_argument(
32 "--instance",
33 required=True,
34 type=Path,
35 nargs="+",
36 help="path to instance to run on",
37 )
38 parser.add_argument(
39 "--run-index",
40 required=True,
41 type=int,
42 help="run index in the dataframe to set.",
43 )
44 parser.add_argument(
45 "--log-dir", type=Path, required=True, help="path to the log directory"
46 )
48 # These two arguments should be mutually exclusive
49 parser.add_argument(
50 "--configuration-id",
51 type=str,
52 required=False,
53 help="configuration id to read from the PerformanceDataFrame.",
54 )
55 parser.add_argument(
56 "--configuration",
57 type=str,
58 nargs="+",
59 required=False,
60 help="configuration for the solver",
61 )
63 parser.add_argument(
64 "--seed",
65 type=int,
66 required=False,
67 help="seed to use for the solver. If not provided, generates one.",
68 )
69 parser.add_argument(
70 "--cutoff-time",
71 type=int,
72 required=False,
73 help="the cutoff time for the solver.",
74 )
75 parser.add_argument(
76 "--objectives",
77 type=str,
78 required=False,
79 nargs="+",
80 help="The objectives to evaluate to Solver on. If not provided, read from the PerformanceDataFrame.",
81 )
82 parser.add_argument(
83 "--target-objective",
84 required=False,
85 type=str,
86 help="The objective to use to determine the best configuration.",
87 )
88 parser.add_argument(
89 "--best-configuration-instances",
90 required=False,
91 type=str,
92 nargs="+",
93 help="If given, will ignore any given configurations, and try to"
94 " determine the best found configurations over the given "
95 "instances. Uses the 'target-objective' given in the arguments"
96 " or the first one given by the dataframe to determine the best"
97 "configuration.",
98 )
99 args = parser.parse_args(argv)
100 # Process command line arguments
101 log_dir = args.log_dir
102 print(f"Running Solver and read/writing results with {args.performance_dataframe}")
103 # Resolve possible multi-file instance
104 instance_path: list[Path] = args.instance
105 # If instance is only one file then we don't need a list
106 instance_path = instance_path[0] if len(instance_path) == 1 else instance_path
107 instance_name = (
108 instance_path.stem if isinstance(instance_path, Path) else instance_path[0].stem
109 )
110 run_index = args.run_index
111 # Ensure stringifcation of path objects
112 if isinstance(instance_path, list):
113 # Double list because of solver.run
114 run_instances = [[str(filepath) for filepath in instance_path]]
115 else:
116 run_instances = str(instance_path)
118 solver = Solver(args.solver)
119 # By default, run the default configuration
120 config_id = PerformanceDataFrame.default_configuration
121 configuration = None
122 # If no seed is provided by CLI, generate one
123 seed = args.seed if args.seed else random.randint(0, 2**32 - 1)
124 # Parse the provided objectives if present
125 objectives = (
126 [resolve_objective(o) for o in args.objectives] if args.objectives else None
127 )
129 if args.configuration: # Configuration provided, override
130 configuration = parse_commandline_dict(args.configuration)
131 config_id = configuration["configuration_id"]
132 elif (
133 (
134 args.configuration_id
135 and args.configuration_id != PerformanceDataFrame.default_configuration
136 )
137 or args.best_configuration_instances
138 or not objectives
139 ): # Read from PerformanceDataFrame, can be slow
140 # Desyncronize from other possible jobs writing to the same file
141 print(
142 "Reading from Performance DataFrame.. "
143 f"[{'configuration' if (args.configuration_id or args.best_configuration_instances) else ''} "
144 f"{'objectives' if not objectives else ''}]"
145 )
146 time.sleep(random.random() * 10)
147 lock = FileLock(f"{args.performance_dataframe}.lock") # Lock the file
148 with lock.acquire(timeout=600):
149 performance_dataframe = PerformanceDataFrame(args.performance_dataframe)
151 if not objectives:
152 objectives = performance_dataframe.objectives
154 if args.best_configuration_instances: # Determine best configuration
155 best_configuration_instances: list[str] = args.best_configuration_instances
156 # Get the unique instance names
157 best_configuration_instances = list(
158 set([Path(instance).stem for instance in best_configuration_instances])
159 )
160 target_objective = (
161 resolve_objective(args.target_objective)
162 if args.target_objective
163 else objectives[0]
164 )
165 config_id, _ = performance_dataframe.best_configuration(
166 solver=str(args.solver),
167 objective=target_objective,
168 instances=best_configuration_instances,
169 )
170 configuration = performance_dataframe.get_full_configuration(
171 str(args.solver), config_id
172 )
174 elif (
175 args.configuration_id
176 ): # Read from PerformanceDataFrame the configuration using the ID
177 config_id = args.configuration_id
178 configuration = performance_dataframe.get_full_configuration(
179 str(args.solver), config_id
180 )
182 print(f"Running Solver {solver} on instance {instance_name} with seed {seed}..")
183 solver_output = solver.run(
184 run_instances,
185 objectives=objectives,
186 seed=seed,
187 configuration=configuration.copy() if configuration else None,
188 cutoff_time=args.cutoff_time,
189 log_dir=log_dir,
190 run_on=Runner.LOCAL,
191 )
193 # Prepare the results for the DataFrame for each objective
194 result = [
195 [solver_output[objective.name] for objective in objectives],
196 [seed] * len(objectives),
197 ]
198 solver_fields = [
199 PerformanceDataFrame.column_value,
200 PerformanceDataFrame.column_seed,
201 ]
203 print(f"For Solver/config: {solver}/{config_id}")
204 print(f"For index: Instance {instance_name}, Run {args.run_index}, Seed {seed}")
205 print("Appending the following objective values:") # {', '.join(objective_values)}")
206 for objective in objectives:
207 print(
208 f"{objective.name}, {instance_name}, {args.run_index} | {args.solver}, {config_id}: {solver_output[objective.name]}"
209 )
211 # Desyncronize from other possible jobs writing to the same file
212 time.sleep(random.random() * 100)
213 # TESTLOG
214 # lock = FileLock("test.log.lock")
215 # with lock.acquire(timeout=600):
216 # with Path("test.log").open("a") as f:
217 # for objective in objectives:
218 # f.write(f"{objective.name}, {instance_name}, {args.run_index} | {solver} {config_id}: {solver_output[objective.name]}, {seed}\n")
220 # Now that we have all the results, we can add them to the performance dataframe
221 lock = FileLock(f"{args.performance_dataframe}.lock") # Lock the file
222 with lock.acquire(timeout=600):
223 performance_dataframe = PerformanceDataFrame(args.performance_dataframe)
224 performance_dataframe.set_value(
225 result,
226 solver=str(args.solver),
227 instance=instance_name,
228 configuration=config_id,
229 objective=[o.name for o in objectives],
230 run=run_index,
231 solver_fields=solver_fields,
232 append_write_csv=True, # We do not have to save the PDF here, thanks to this argument
233 )
236if __name__ == "__main__":
237 main(sys.argv[1:])