Coverage for src / sparkle / selector / extractor_cli.py: 0%

48 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-21 15:31 +0000

1#!/usr/bin/env python3 

2# -*- coding: UTF-8 -*- 

3"""Execute Feature Extractor for an instance, write features to FeatureDataFrame.""" 

4 

5import argparse 

6from pathlib import Path 

7from filelock import FileLock 

8 

9from sparkle.structures import FeatureDataFrame 

10from sparkle.selector import Extractor 

11 

12 

13if __name__ == "__main__": 

14 # Define command line arguments 

15 parser = argparse.ArgumentParser() 

16 parser.add_argument( 

17 "--extractor", required=True, type=Path, help="path to feature extractor" 

18 ) 

19 parser.add_argument( 

20 "--instance", 

21 required=True, 

22 type=Path, 

23 nargs="+", 

24 help="path to instance file(s) to run on", 

25 ) 

26 parser.add_argument( 

27 "--feature-csv", required=True, type=Path, help="path to feature data CSV file" 

28 ) 

29 parser.add_argument( 

30 "--cutoff", 

31 required=True, 

32 type=str, 

33 help="the maximum CPU time for the extractor.", 

34 ) 

35 parser.add_argument( 

36 "--feature-group", 

37 required=False, 

38 type=str, 

39 help="the group of features to compute, if available for the " 

40 "extractor. If not available or provided, all groups will" 

41 " be computed.", 

42 ) 

43 parser.add_argument( 

44 "--log-dir", type=Path, required=True, help="path to the log directory" 

45 ) 

46 args = parser.parse_args() 

47 

48 # Process command line arguments 

49 log_dir = args.log_dir 

50 

51 # Instance agument is a list to allow for multifile instances 

52 instance_path: list[Path] = args.instance 

53 instance_name = instance_path[0].stem 

54 extractor_path = args.extractor 

55 feature_data_csv_path = args.feature_csv 

56 cutoff_extractor = args.cutoff 

57 

58 # Ensure stringifcation of path objects 

59 if isinstance(instance_path, list): 

60 instance_list = [str(filepath) for filepath in instance_path] 

61 else: 

62 instance_list = [str(instance_path)] 

63 

64 extractor = Extractor(extractor_path) 

65 if args.feature_group: 

66 print( 

67 f"Calling {extractor.name} with feature group {args.feature_group} for instance {instance_list} with cutoff {cutoff_extractor}" 

68 ) 

69 else: 

70 print( 

71 f"Calling {extractor.name} for instance {instance_list} with cutoff {cutoff_extractor}" 

72 ) 

73 

74 features = extractor.run( 

75 instance_list, 

76 feature_group=args.feature_group, 

77 cutoff_time=cutoff_extractor, 

78 log_dir=log_dir, 

79 ) 

80 

81 if features is None or len(features) == 0: 

82 raise ValueError( 

83 "No features found! This may be due to a timeout. Check extractor logs." 

84 ) 

85 

86 feature_data_per_group = {} 

87 for feature_group, feature_name, value in features: 

88 if feature_group not in feature_data_per_group: 

89 feature_data_per_group[feature_group] = [[], []] 

90 print( 

91 f"{extractor_path.name} {instance_name} {feature_group} {feature_name} | {value}" 

92 ) # For logging purposes 

93 feature_data_per_group[feature_group][0] += [feature_name] 

94 feature_data_per_group[feature_group][1] += [float(value)] 

95 

96 # Now that we have our result, we write it to the FeatureDataCSV with a FileLock 

97 lock = FileLock(f"{feature_data_csv_path}.lock") 

98 if features is not None: 

99 print("Writing features to file...") 

100 with lock.acquire(timeout=600): 

101 feature_data = FeatureDataFrame(feature_data_csv_path) 

102 instance_key = ( 

103 instance_name 

104 if instance_name in feature_data.instances 

105 else str(instance_path[0].with_suffix("")) 

106 ) 

107 for feature_group, ( 

108 feature_names, 

109 feature_values, 

110 ) in feature_data_per_group.items(): 

111 # for feature_group, feature_name, value in features: 

112 feature_data.set_value( 

113 instance_key, 

114 extractor_path.name, 

115 feature_group, 

116 feature_names, 

117 feature_values, 

118 append_write_csv=True, 

119 ) 

120 lock.release() 

121 print("Writing successful!") 

122 else: 

123 print( 

124 "EXCEPTION during retrieving extractor results.\n" 

125 f"****** WARNING: Feature vector computation on instance {instance_path}" 

126 " failed! ******" 

127 )