Coverage for sparkle/solver/solver_cli.py: 74%
76 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Run a solver, read/write to performance dataframe."""
5import sys
6from filelock import FileLock
7import argparse
8from pathlib import Path
9import random
10import time
12from runrunner import Runner
14from sparkle.solver import Solver
15from sparkle.types import resolve_objective
16from sparkle.structures import PerformanceDataFrame
17from sparkle.tools.solver_wrapper_parsing import parse_commandline_dict
20def main(argv: list[str]) -> None:
21 """Main function of the command."""
22 # Define command line arguments
23 parser = argparse.ArgumentParser()
24 parser.add_argument(
25 "--performance-dataframe",
26 required=True,
27 type=Path,
28 help="path to the performance dataframe",
29 )
30 parser.add_argument("--solver", required=True, type=Path, help="path to solver")
31 parser.add_argument(
32 "--instance",
33 required=True,
34 type=Path,
35 nargs="+",
36 help="path to instance to run on",
37 )
38 parser.add_argument(
39 "--run-index",
40 required=True,
41 type=int,
42 help="run index in the dataframe to set.",
43 )
44 parser.add_argument(
45 "--log-dir", type=Path, required=True, help="path to the log directory"
46 )
48 # These two arguments should be mutually exclusive
49 parser.add_argument(
50 "--configuration-id",
51 type=str,
52 required=False,
53 help="configuration id to read from the PerformanceDataFrame.",
54 )
55 parser.add_argument(
56 "--configuration",
57 type=str,
58 nargs="+",
59 required=False,
60 help="configuration for the solver",
61 )
63 parser.add_argument(
64 "--seed",
65 type=int,
66 required=False,
67 help="seed to use for the solver. If not provided, generates one.",
68 )
69 parser.add_argument(
70 "--cutoff-time",
71 type=int,
72 required=False,
73 help="the cutoff time for the solver.",
74 )
75 parser.add_argument(
76 "--objectives",
77 type=str,
78 required=False,
79 nargs="+",
80 help="The objectives to evaluate to Solver on. If not provided, read from the PerformanceDataFrame.",
81 )
82 parser.add_argument(
83 "--target-objective",
84 required=False,
85 type=str,
86 help="The objective to use to determine the best configuration.",
87 )
88 parser.add_argument(
89 "--best-configuration-instances",
90 required=False,
91 type=str,
92 nargs="+",
93 help="If given, will ignore any given configurations, and try to"
94 " determine the best found configurations over the given "
95 "instances. Uses the 'target-objective' given in the arguments"
96 " or the first one given by the dataframe to determine the best"
97 "configuration.",
98 )
99 args = parser.parse_args(argv)
100 # Process command line arguments
101 log_dir = args.log_dir
102 print(f"Running Solver and read/writing results with {args.performance_dataframe}")
103 # Resolve possible multi-file instance
104 instance_path: list[Path] = args.instance
105 # If instance is only one file then we don't need a list
106 instance_path = instance_path[0] if len(instance_path) == 1 else instance_path
107 instance_name = (
108 instance_path.stem if isinstance(instance_path, Path) else instance_path[0].stem
109 )
110 run_index = args.run_index
111 # Ensure stringifcation of path objects
112 if isinstance(instance_path, list):
113 # Double list because of solver.run
114 run_instances = [[str(filepath) for filepath in instance_path]]
115 else:
116 run_instances = str(instance_path)
118 solver = Solver(args.solver)
119 # By default, run the default configuration
120 config_id = PerformanceDataFrame.default_configuration
121 configuration = None
122 # If no seed is provided by CLI, generate one
123 seed = args.seed if args.seed else random.randint(0, 2**32 - 1)
124 # Parse the provided objectives if present
125 objectives = (
126 [resolve_objective(o) for o in args.objectives] if args.objectives else None
127 )
129 if args.configuration: # Configuration provided, override
130 configuration = parse_commandline_dict(args.configuration)
131 config_id = configuration["configuration_id"]
132 elif (
133 args.configuration_id or args.best_configuration_instances or not objectives
134 ): # Read from PerformanceDataFrame, can be slow
135 # Desyncronize from other possible jobs writing to the same file
136 print(
137 "Reading from Performance DataFrame.. "
138 f"[{'configuration' if (args.configuration_id or args.best_configuration_instances) else ''} "
139 f"{'objectives' if not objectives else ''}]"
140 )
141 time.sleep(random.random() * 10)
142 lock = FileLock(f"{args.performance_dataframe}.lock") # Lock the file
143 with lock.acquire(timeout=600):
144 performance_dataframe = PerformanceDataFrame(args.performance_dataframe)
146 if not objectives:
147 objectives = performance_dataframe.objectives
149 if args.best_configuration_instances: # Determine best configuration
150 best_configuration_instances: list[str] = args.best_configuration_instances
151 # Get the unique instance names
152 best_configuration_instances = list(
153 set([Path(instance).stem for instance in best_configuration_instances])
154 )
155 target_objective = (
156 resolve_objective(args.target_objective)
157 if args.target_objective
158 else objectives[0]
159 )
160 config_id, _ = performance_dataframe.best_configuration(
161 solver=str(args.solver),
162 objective=target_objective,
163 instances=best_configuration_instances,
164 )
165 configuration = performance_dataframe.get_full_configuration(
166 str(args.solver), config_id
167 )
169 elif (
170 args.configuration_id
171 ): # Read from PerformanceDataFrame the configuration using the ID
172 config_id = args.configuration_id
173 configuration = performance_dataframe.get_full_configuration(
174 str(args.solver), config_id
175 )
177 print(f"Running Solver {solver} on instance {instance_name} with seed {seed}..")
178 solver_output = solver.run(
179 run_instances,
180 objectives=objectives,
181 seed=seed,
182 configuration=configuration.copy() if configuration else None,
183 cutoff_time=args.cutoff_time,
184 log_dir=log_dir,
185 run_on=Runner.LOCAL,
186 )
188 # Prepare the results for the DataFrame for each objective
189 result = [
190 [solver_output[objective.name] for objective in objectives],
191 [seed] * len(objectives),
192 ]
193 solver_fields = [
194 PerformanceDataFrame.column_value,
195 PerformanceDataFrame.column_seed,
196 ]
198 print(f"For Solver/config: {solver}/{config_id}")
199 print(f"For index: Instance {instance_name}, Run {args.run_index}, Seed {seed}")
200 print("Appending the following objective values:") # {', '.join(objective_values)}")
201 for objective in objectives:
202 print(
203 f"{objective.name}, {instance_name}, {args.run_index} | {args.solver}, {config_id}: {solver_output[objective.name]}"
204 )
206 # Desyncronize from other possible jobs writing to the same file
207 time.sleep(random.random() * 100)
208 # TESTLOG
209 # lock = FileLock("test.log.lock")
210 # with lock.acquire(timeout=600):
211 # with Path("test.log").open("a") as f:
212 # for objective in objectives:
213 # f.write(f"{objective.name}, {instance_name}, {args.run_index} | {solver} {config_id}: {solver_output[objective.name]}, {seed}\n")
215 # Now that we have all the results, we can add them to the performance dataframe
216 lock = FileLock(f"{args.performance_dataframe}.lock") # Lock the file
217 with lock.acquire(timeout=600):
218 performance_dataframe = PerformanceDataFrame(args.performance_dataframe)
219 performance_dataframe.set_value(
220 result,
221 solver=str(args.solver),
222 instance=instance_name,
223 configuration=config_id,
224 objective=[o.name for o in objectives],
225 run=run_index,
226 solver_fields=solver_fields,
227 append_write_csv=True, # We do not have to save the PDF here, thanks to this argument
228 )
231if __name__ == "__main__":
232 main(sys.argv[1:])