Coverage for sparkle/instance/instances.py: 100%

84 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-09-29 10:17 +0000

1"""Objects and methods relating to instances for Sparkle.""" 

2 

3from __future__ import annotations 

4from pathlib import Path 

5 

6import csv 

7import numpy as np 

8 

9 

10class InstanceSet: 

11 """Base object representation of a set of instances.""" 

12 

13 def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None: 

14 """Initialise an Instances object from a directory. 

15 

16 Args: 

17 target: The Path, or list of paths to create the instance set from. 

18 """ 

19 self.directory: Path = target 

20 self._instance_names: list[str] = [] 

21 self._instance_paths: list[Path] = [] 

22 

23 @property 

24 def size(self: InstanceSet) -> int: 

25 """Returns the number of instances in the set.""" 

26 return len(self._instance_paths) 

27 

28 @property 

29 def all_paths(self: InstanceSet) -> list[Path]: 

30 """Returns all file paths in the instance set as a flat list.""" 

31 return self._instance_paths 

32 

33 @property 

34 def instance_paths(self: InstanceSet) -> list[Path]: 

35 """Get processed instance paths.""" 

36 return self._instance_paths 

37 

38 @property 

39 def instance_names(self: InstanceSet) -> list[str]: 

40 """Get processed instance names for instances.""" 

41 return self._instance_names 

42 

43 @property 

44 def instances(self: InstanceSet) -> list[str]: 

45 """Get instance names with relative path.""" 

46 return [str(p.with_suffix("")) for p in self._instance_paths] 

47 

48 @property 

49 def name(self: InstanceSet) -> str: 

50 """Get instance set name.""" 

51 return self.directory.name 

52 

53 def __str__(self: InstanceSet) -> str: 

54 """Get the string representation of an Instance Set.""" 

55 return self.name 

56 

57 def __repr__(self: InstanceSet) -> str: 

58 """Get detailed representation of an Instance Set.""" 

59 return ( 

60 f"{self.name}:\n" 

61 f"\t- Type: {type(self).__name__}\n" 

62 f"\t- Directory: {self.directory}\n" 

63 f"\t- # Instances: {self.size}" 

64 ) 

65 

66 def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]: 

67 """Retrieves an instance paths by its name. Returns None upon failure.""" 

68 for idx, instance_name in enumerate(self._instance_names): 

69 if instance_name == name: 

70 return self._instance_paths[idx] 

71 return None 

72 

73 

74class FileInstanceSet(InstanceSet): 

75 """Object representation of a set of single-file instances.""" 

76 

77 def __init__(self: FileInstanceSet, target: Path) -> None: 

78 """Initialise an InstanceSet, where each instance is a file in the directory. 

79 

80 Args: 

81 target: Path to the instances directory. If multiple files are found, 

82 they are assumed to have the same number of instances per file. 

83 """ 

84 super().__init__(target) 

85 self._name: str = target.stem 

86 if target.is_file(): 

87 # Single instance set 

88 self._instance_paths = [target] 

89 self._instance_names = [target.stem] 

90 self.directory = target.parent 

91 else: 

92 # Default situation, treat each file in the directory as an instance 

93 self._instance_paths = [p for p in self.directory.iterdir()] 

94 self._instance_names = [p.stem for p in self._instance_paths] 

95 

96 @property 

97 def name(self: FileInstanceSet) -> str: 

98 """Get instance set name.""" 

99 return self._name 

100 

101 

102class MultiFileInstanceSet(InstanceSet): 

103 """Object representation of a set of multi-file instances.""" 

104 

105 instance_csv = "instances.csv" 

106 

107 def __init__(self: MultiFileInstanceSet, target: Path) -> None: 

108 """Initialise an Instances object from a directory. 

109 

110 Args: 

111 target: Path to the instances directory. Will read from instances.csv. 

112 """ 

113 target_dir = target.parent if not target.is_dir() else target 

114 super().__init__(target_dir) 

115 # A path pointing to the directory of instances 

116 self.instance_file = self.directory / MultiFileInstanceSet.instance_csv 

117 # Read from instance_file 

118 if not target.is_dir(): 

119 # Single file 

120 instance_list = [ 

121 line 

122 for line in csv.reader(self.instance_file.open()) 

123 if target.stem in line 

124 ] 

125 else: 

126 # Multi file 

127 instance_list = [line for line in csv.reader(self.instance_file.open())] 

128 

129 for instance in instance_list: 

130 self._instance_names.append(instance[0]) 

131 self._instance_paths.append( 

132 [(self.directory / f) if isinstance(f, str) else f for f in instance[1:]] 

133 ) 

134 

135 @property 

136 def all_paths(self: MultiFileInstanceSet) -> list[Path]: 

137 """Returns all file paths in the instance set as a flat list.""" 

138 return [p for instance in self._instance_paths for p in instance] + [ 

139 self.instance_file 

140 ] 

141 

142 @property 

143 def instances(self: InstanceSet) -> list[str]: 

144 """Get instance names with relative path for multi-file instances.""" 

145 return [self.directory / inst_name for inst_name in self.instance_names] 

146 

147 

148class IterableFileInstanceSet(InstanceSet): 

149 """Object representation of files containing multiple instances.""" 

150 

151 supported_filetypes = set([".csv", ".npy"]) 

152 

153 def __init__(self: IterableFileInstanceSet, target: Path) -> None: 

154 """Initialise an InstanceSet from a single file. 

155 

156 Args: 

157 target: Path to the instances directory. If multiple files are found, 

158 they are assumed to have the same number of instances. 

159 """ 

160 super().__init__(target) 

161 self._instance_paths = [ 

162 p 

163 for p in self.directory.iterdir() 

164 if p.suffix in IterableFileInstanceSet.supported_filetypes 

165 ] 

166 self._size = IterableFileInstanceSet.__determine_size__(self._instance_paths[0]) 

167 self._instance_names = [p.name for p in self._instance_paths] 

168 

169 @property 

170 def size(self: IterableFileInstanceSet) -> int: 

171 """Returns the number of instances in the set.""" 

172 return self._size 

173 

174 @staticmethod 

175 def __determine_size__(file: Path) -> int: 

176 """Determine the number of instances in a file.""" 

177 match file.suffix: 

178 case ".csv": 

179 return len(file.open().readlines()) 

180 case ".npy": 

181 return len(np.load(file))