Coverage for src / sparkle / selector / extractor_cli.py: 0%
48 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 15:31 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 15:31 +0000
1#!/usr/bin/env python3
2# -*- coding: UTF-8 -*-
3"""Execute Feature Extractor for an instance, write features to FeatureDataFrame."""
5import argparse
6from pathlib import Path
7from filelock import FileLock
9from sparkle.structures import FeatureDataFrame
10from sparkle.selector import Extractor
13if __name__ == "__main__":
14 # Define command line arguments
15 parser = argparse.ArgumentParser()
16 parser.add_argument(
17 "--extractor", required=True, type=Path, help="path to feature extractor"
18 )
19 parser.add_argument(
20 "--instance",
21 required=True,
22 type=Path,
23 nargs="+",
24 help="path to instance file(s) to run on",
25 )
26 parser.add_argument(
27 "--feature-csv", required=True, type=Path, help="path to feature data CSV file"
28 )
29 parser.add_argument(
30 "--cutoff",
31 required=True,
32 type=str,
33 help="the maximum CPU time for the extractor.",
34 )
35 parser.add_argument(
36 "--feature-group",
37 required=False,
38 type=str,
39 help="the group of features to compute, if available for the "
40 "extractor. If not available or provided, all groups will"
41 " be computed.",
42 )
43 parser.add_argument(
44 "--log-dir", type=Path, required=True, help="path to the log directory"
45 )
46 args = parser.parse_args()
48 # Process command line arguments
49 log_dir = args.log_dir
51 # Instance agument is a list to allow for multifile instances
52 instance_path: list[Path] = args.instance
53 instance_name = instance_path[0].stem
54 extractor_path = args.extractor
55 feature_data_csv_path = args.feature_csv
56 cutoff_extractor = args.cutoff
58 # Ensure stringifcation of path objects
59 if isinstance(instance_path, list):
60 instance_list = [str(filepath) for filepath in instance_path]
61 else:
62 instance_list = [str(instance_path)]
64 extractor = Extractor(extractor_path)
65 if args.feature_group:
66 print(
67 f"Calling {extractor.name} with feature group {args.feature_group} for instance {instance_list} with cutoff {cutoff_extractor}"
68 )
69 else:
70 print(
71 f"Calling {extractor.name} for instance {instance_list} with cutoff {cutoff_extractor}"
72 )
74 features = extractor.run(
75 instance_list,
76 feature_group=args.feature_group,
77 cutoff_time=cutoff_extractor,
78 log_dir=log_dir,
79 )
81 if features is None or len(features) == 0:
82 raise ValueError(
83 "No features found! This may be due to a timeout. Check extractor logs."
84 )
86 feature_data_per_group = {}
87 for feature_group, feature_name, value in features:
88 if feature_group not in feature_data_per_group:
89 feature_data_per_group[feature_group] = [[], []]
90 print(
91 f"{extractor_path.name} {instance_name} {feature_group} {feature_name} | {value}"
92 ) # For logging purposes
93 feature_data_per_group[feature_group][0] += [feature_name]
94 feature_data_per_group[feature_group][1] += [float(value)]
96 # Now that we have our result, we write it to the FeatureDataCSV with a FileLock
97 lock = FileLock(f"{feature_data_csv_path}.lock")
98 if features is not None:
99 print("Writing features to file...")
100 with lock.acquire(timeout=600):
101 feature_data = FeatureDataFrame(feature_data_csv_path)
102 instance_key = (
103 instance_name
104 if instance_name in feature_data.instances
105 else str(instance_path[0].with_suffix(""))
106 )
107 for feature_group, (
108 feature_names,
109 feature_values,
110 ) in feature_data_per_group.items():
111 # for feature_group, feature_name, value in features:
112 feature_data.set_value(
113 instance_key,
114 extractor_path.name,
115 feature_group,
116 feature_names,
117 feature_values,
118 append_write_csv=True,
119 )
120 lock.release()
121 print("Writing successful!")
122 else:
123 print(
124 "EXCEPTION during retrieving extractor results.\n"
125 f"****** WARNING: Feature vector computation on instance {instance_path}"
126 " failed! ******"
127 )