Coverage for sparkle/instance/instances.py: 99%

84 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-07-01 13:21 +0000

1"""Objects and methods relating to instances for Sparkle.""" 

2from __future__ import annotations 

3from pathlib import Path 

4 

5import csv 

6import numpy as np 

7 

8 

9class InstanceSet: 

10 """Base object representation of a set of instances.""" 

11 

12 def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None: 

13 """Initialise an Instances object from a directory. 

14 

15 Args: 

16 target: The Path, or list of paths to create the instance set from. 

17 """ 

18 self.directory: Path = target 

19 self._instance_names: list[str] = [] 

20 self._instance_paths: list[Path] = [] 

21 

22 @property 

23 def size(self: InstanceSet) -> int: 

24 """Returns the number of instances in the set.""" 

25 return len(self._instance_paths) 

26 

27 @property 

28 def all_paths(self: InstanceSet) -> list[Path]: 

29 """Returns all file paths in the instance set as a flat list.""" 

30 return self._instance_paths 

31 

32 @property 

33 def instance_paths(self: InstanceSet) -> list[Path]: 

34 """Get processed instance paths.""" 

35 return self._instance_paths 

36 

37 @property 

38 def instance_names(self: InstanceSet) -> list[str]: 

39 """Get processed instance names for instances.""" 

40 return self._instance_names 

41 

42 @property 

43 def instances(self: InstanceSet) -> list[str]: 

44 """Get instance names with relative path.""" 

45 return [str(p.with_suffix("")) for p in self._instance_paths] 

46 

47 @property 

48 def name(self: InstanceSet) -> str: 

49 """Get instance set name.""" 

50 return self.directory.name 

51 

52 def __str__(self: InstanceSet) -> str: 

53 """Get the string representation of an Instance Set.""" 

54 return self.name 

55 

56 def __repr__(self: InstanceSet) -> str: 

57 """Get detailed representation of an Instance Set.""" 

58 return f"{self.name}:\n"\ 

59 f"\t- Type: {type(self).__name__}\n"\ 

60 f"\t- Directory: {self.directory}\n"\ 

61 f"\t- # Instances: {self.size}" 

62 

63 def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]: 

64 """Retrieves an instance paths by its name. Returns None upon failure.""" 

65 for idx, instance_name in enumerate(self._instance_names): 

66 if instance_name == name: 

67 return self._instance_paths[idx] 

68 return None 

69 

70 

71class FileInstanceSet(InstanceSet): 

72 """Object representation of a set of single-file instances.""" 

73 

74 def __init__(self: FileInstanceSet, target: Path) -> None: 

75 """Initialise an InstanceSet, where each instance is a file in the directory. 

76 

77 Args: 

78 target: Path to the instances directory. If multiple files are found, 

79 they are assumed to have the same number of instances per file. 

80 """ 

81 super().__init__(target) 

82 self._name: str = target.stem 

83 if target.is_file(): 

84 # Single instance set 

85 self._instance_paths = [target] 

86 self._instance_names = [target.stem] 

87 self.directory = target.parent 

88 else: 

89 # Default situation, treat each file in the directory as an instance 

90 self._instance_paths = [p for p in self.directory.iterdir()] 

91 self._instance_names = [p.stem for p in self._instance_paths] 

92 

93 @property 

94 def name(self: FileInstanceSet) -> str: 

95 """Get instance set name.""" 

96 return self._name 

97 

98 

99class MultiFileInstanceSet(InstanceSet): 

100 """Object representation of a set of multi-file instances.""" 

101 instance_csv = "instances.csv" 

102 

103 def __init__(self: MultiFileInstanceSet, target: Path) -> None: 

104 """Initialise an Instances object from a directory. 

105 

106 Args: 

107 target: Path to the instances directory. Will read from instances.csv. 

108 """ 

109 target_dir = target.parent if not target.is_dir() else target 

110 super().__init__(target_dir) 

111 # A path pointing to the directory of instances 

112 self.instance_file = self.directory / MultiFileInstanceSet.instance_csv 

113 # Read from instance_file 

114 if not target.is_dir(): 

115 # Single file 

116 instance_list = [line for line in csv.reader( 

117 self.instance_file.open()) if target.stem in line] 

118 else: 

119 # Multi file 

120 instance_list = [line for line in csv.reader(self.instance_file.open())] 

121 

122 for instance in instance_list: 

123 self._instance_names.append(instance[0]) 

124 self._instance_paths.append([ 

125 (self.directory / f) if isinstance(f, str) else f for f in instance[1:]]) 

126 

127 @property 

128 def all_paths(self: MultiFileInstanceSet) -> list[Path]: 

129 """Returns all file paths in the instance set as a flat list.""" 

130 return [p for instance in self._instance_paths for p in instance] + \ 

131 [self.instance_file] 

132 

133 @property 

134 def instances(self: InstanceSet) -> list[str]: 

135 """Get instance names with relative path for multi-file instances.""" 

136 return [self.directory / inst_name for inst_name in self.instance_names] 

137 

138 

139class IterableFileInstanceSet(InstanceSet): 

140 """Object representation of files containing multiple instances.""" 

141 supported_filetypes = set([".csv", ".npy"]) 

142 

143 def __init__(self: IterableFileInstanceSet, target: Path) -> None: 

144 """Initialise an InstanceSet from a single file. 

145 

146 Args: 

147 target: Path to the instances directory. If multiple files are found, 

148 they are assumed to have the same number of instances. 

149 """ 

150 super().__init__(target) 

151 self._instance_paths =\ 

152 [p for p in self.directory.iterdir() 

153 if p.suffix in IterableFileInstanceSet.supported_filetypes] 

154 self._size = IterableFileInstanceSet.__determine_size__(self._instance_paths[0]) 

155 self._instance_names = [p.name for p in self._instance_paths] 

156 

157 @property 

158 def size(self: IterableFileInstanceSet) -> int: 

159 """Returns the number of instances in the set.""" 

160 return self._size 

161 

162 @staticmethod 

163 def __determine_size__(file: Path) -> int: 

164 """Determine the number of instances in a file.""" 

165 match file.suffix: 

166 case ".csv": 

167 return len(file.open().readlines()) 

168 case ".npy": 

169 return len(np.load(file))