Coverage for sparkle/selector/extractor_cli.py: 0%

36 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 10:17 +0000

1#!/usr/bin/env python3 

2# -*- coding: UTF-8 -*- 

3"""Execute Feature Extractor for an instance, write features to FeatureDataFrame.""" 

4 

5import argparse 

6from pathlib import Path 

7from filelock import FileLock 

8 

9from sparkle.structures import FeatureDataFrame 

10from sparkle.selector import Extractor 

11 

12 

13if __name__ == "__main__": 

14 # Define command line arguments 

15 parser = argparse.ArgumentParser() 

16 parser.add_argument( 

17 "--extractor", required=True, type=Path, help="path to feature extractor" 

18 ) 

19 parser.add_argument( 

20 "--instance", 

21 required=True, 

22 type=Path, 

23 nargs="+", 

24 help="path to instance file(s) to run on", 

25 ) 

26 parser.add_argument( 

27 "--feature-csv", required=True, type=Path, help="path to feature data CSV file" 

28 ) 

29 parser.add_argument( 

30 "--cutoff", 

31 required=True, 

32 type=str, 

33 help="the maximum CPU time for the extractor.", 

34 ) 

35 parser.add_argument( 

36 "--feature-group", 

37 required=False, 

38 type=str, 

39 help="the group of features to compute, if available for the " 

40 "extractor. If not available or provided, all groups will" 

41 " be computed.", 

42 ) 

43 parser.add_argument( 

44 "--log-dir", type=Path, required=True, help="path to the log directory" 

45 ) 

46 args = parser.parse_args() 

47 

48 # Process command line arguments 

49 log_dir = args.log_dir 

50 

51 # Instance agument is a list to allow for multifile instances 

52 instance_path: list[Path] = args.instance 

53 instance_name = instance_path[0].stem 

54 extractor_path = args.extractor 

55 feature_data_csv_path = args.feature_csv 

56 cutoff_extractor = args.cutoff 

57 

58 # Ensure stringifcation of path objects 

59 if isinstance(instance_path, list): 

60 instance_list = [str(filepath) for filepath in instance_path] 

61 else: 

62 instance_list = [str(instance_path)] 

63 

64 extractor = Extractor(extractor_path) 

65 features = extractor.run( 

66 instance_list, 

67 feature_group=args.feature_group, 

68 cutoff_time=cutoff_extractor, 

69 log_dir=log_dir, 

70 ) 

71 

72 # Now that we have our result, we write it to the FeatureDataCSV with a FileLock 

73 lock = FileLock(f"{feature_data_csv_path}.lock") 

74 if features is not None: 

75 print(f"Writing features to CSV: {instance_name}, {extractor_path.name}") 

76 with lock.acquire(timeout=60): 

77 feature_data = FeatureDataFrame(feature_data_csv_path) 

78 instance_key = ( 

79 instance_name 

80 if instance_name in feature_data.instances 

81 else str(instance_path[0].with_suffix("")) 

82 ) 

83 for feature_group, feature_name, value in features: 

84 feature_data.set_value( 

85 instance_key, 

86 extractor_path.name, 

87 feature_group, 

88 feature_name, 

89 float(value), 

90 ) 

91 feature_data.save_csv() 

92 lock.release() 

93 else: 

94 print( 

95 "EXCEPTION during retrieving extractor results.\n" 

96 f"****** WARNING: Feature vector computation on instance {instance_path}" 

97 " failed! ******" 

98 )