Coverage for sparkle/instance/instances.py: 78%

81 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-07 15:22 +0000

1"""Objects and methods relating to instances for Sparkle.""" 

2from __future__ import annotations 

3from pathlib import Path 

4 

5import csv 

6import numpy as np 

7 

8 

9class InstanceSet: 

10 """Base object representation of a set of instances.""" 

11 

12 def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None: 

13 """Initialise an Instances object from a directory. 

14 

15 Args: 

16 target: The Path, or list of paths to create the instance set from. 

17 """ 

18 self.directory: Path = target 

19 self._instance_names: list[str] = [] 

20 self._instance_paths: list[Path] = [] 

21 

22 @property 

23 def size(self: InstanceSet) -> int: 

24 """Returns the number of instances in the set.""" 

25 return len(self._instance_paths) 

26 

27 @property 

28 def all_paths(self: InstanceSet) -> list[Path]: 

29 """Returns all file paths in the instance set as a flat list.""" 

30 return self._instance_paths 

31 

32 @property 

33 def instance_paths(self: InstanceSet) -> list[Path]: 

34 """Get processed instance paths.""" 

35 return self._instance_paths 

36 

37 @property 

38 def instance_names(self: InstanceSet) -> list[str]: 

39 """Get processed instance names for multi-file instances.""" 

40 return self._instance_names 

41 

42 @property 

43 def name(self: InstanceSet) -> str: 

44 """Get instance set name.""" 

45 return self.directory.name 

46 

47 def __str__(self: InstanceSet) -> str: 

48 """Get the string representation of an Instance Set.""" 

49 return self.name 

50 

51 def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]: 

52 """Retrieves an instance paths by its name. Returns None upon failure.""" 

53 for idx, instance_name in enumerate(self._instance_names): 

54 if instance_name == name: 

55 return self._instance_paths[idx] 

56 return None 

57 

58 

59class FileInstanceSet(InstanceSet): 

60 """Object representation of a set of single-file instances.""" 

61 

62 def __init__(self: FileInstanceSet, target: Path) -> None: 

63 """Initialise an InstanceSet, where each instance is a file in the directory. 

64 

65 Args: 

66 target: Path to the instances directory. If multiple files are found, 

67 they are assumed to have the same number of instances per file. 

68 """ 

69 super().__init__(target) 

70 self.directory: Path = target 

71 self._name: str = target.name if target.is_dir() else target.stem 

72 if self.directory.is_file(): 

73 # Single instance set 

74 self._instance_paths = [self.directory] 

75 self._instance_names = [self.directory.name] 

76 self.directory = self.directory.parent 

77 else: 

78 # Default situation, treat each file in the directory as an instance 

79 self._instance_paths = [p for p in self.directory.iterdir()] 

80 self._instance_names = [p.name for p in self._instance_paths] 

81 

82 @property 

83 def name(self: FileInstanceSet) -> str: 

84 """Get instance set name.""" 

85 return self._name 

86 

87 

88class MultiFileInstanceSet(InstanceSet): 

89 """Object representation of a set of multi-file instances.""" 

90 instance_csv = "instances.csv" 

91 

92 def __init__(self: MultiFileInstanceSet, target: Path | list[str, Path]) -> None: 

93 """Initialise an Instances object from a directory. 

94 

95 Args: 

96 target: Path to the instances directory. Will read from instance_list.csv. 

97 If directory is a list of [str, Path], create an Instance set of one. 

98 """ 

99 super().__init__(target) 

100 if isinstance(target, list): 

101 # A single instance represented as a list of [name, path1, path2, ...] 

102 instance_list = target 

103 target = target[1].parent 

104 elif isinstance(target, Path): 

105 # A path pointing to the directory of instances 

106 instance_file = target / MultiFileInstanceSet.instance_csv 

107 # Read from instance_file 

108 instance_list = [line for line in csv.reader(instance_file.open())] 

109 

110 self.directory = target if target.is_dir() else target.parent 

111 self._instance_names, self._instance_paths = [], [] 

112 for instance in instance_list: 

113 self._instance_names.append(instance[0]) 

114 self._instance_paths.append([ 

115 (self.directory / f) if isinstance(f, str) else f for f in instance[1:]]) 

116 

117 @property 

118 def all_paths(self: MultiFileInstanceSet) -> list[Path]: 

119 """Returns all file paths in the instance set as a flat list.""" 

120 return [p for instance in self._instance_paths for p in instance] + [ 

121 self.directory / MultiFileInstanceSet.instance_csv] 

122 

123 

124class IterableFileInstanceSet(InstanceSet): 

125 """Object representation of files containing multiple instances.""" 

126 supported_filetypes = set([".csv", ".npy"]) 

127 

128 def __init__(self: IterableFileInstanceSet, target: Path) -> None: 

129 """Initialise an InstanceSet from a single file. 

130 

131 Args: 

132 target: Path to the instances directory. If multiple files are found, 

133 they are assumed to have the same number of instances. 

134 """ 

135 super().__init__(target) 

136 self.directory = target 

137 self._instance_paths =\ 

138 [p for p in self.directory.iterdir() 

139 if p.suffix in IterableFileInstanceSet.supported_filetypes] 

140 self._size = len(self._instance_paths[0].open().readlines()) 

141 self._instance_names = [p.name for p in self._instance_paths] 

142 

143 @property 

144 def size(self: IterableFileInstanceSet) -> int: 

145 """Returns the number of instances in the set.""" 

146 return self._size 

147 

148 @staticmethod 

149 def __determine_size__(file: Path) -> int: 

150 """Determine the number of instances in a file.""" 

151 match file.suffix: 

152 case ".csv": 

153 return len(file.open().readlines()) 

154 case ".npy": 

155 return len(np.load(file))