Coverage for sparkle/instance/instances.py: 86%

79 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 14:48 +0000

1"""Objects and methods relating to instances for Sparkle.""" 

2from __future__ import annotations 

3from pathlib import Path 

4 

5import csv 

6import numpy as np 

7 

8 

9class InstanceSet: 

10 """Base object representation of a set of instances.""" 

11 

12 def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None: 

13 """Initialise an Instances object from a directory. 

14 

15 Args: 

16 target: The Path, or list of paths to create the instance set from. 

17 """ 

18 self.directory: Path = target 

19 self._instance_names: list[str] = [] 

20 self._instance_paths: list[Path] = [] 

21 

22 @property 

23 def size(self: InstanceSet) -> int: 

24 """Returns the number of instances in the set.""" 

25 return len(self._instance_paths) 

26 

27 @property 

28 def all_paths(self: InstanceSet) -> list[Path]: 

29 """Returns all file paths in the instance set as a flat list.""" 

30 return self._instance_paths 

31 

32 @property 

33 def instance_paths(self: InstanceSet) -> list[Path]: 

34 """Get processed instance paths.""" 

35 return self._instance_paths 

36 

37 @property 

38 def instance_names(self: InstanceSet) -> list[str]: 

39 """Get processed instance names for multi-file instances.""" 

40 return self._instance_names 

41 

42 @property 

43 def name(self: InstanceSet) -> str: 

44 """Get instance set name.""" 

45 return self.directory.name 

46 

47 def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]: 

48 """Retrieves an instance paths by its name. Returns None upon failure.""" 

49 for idx, instance_name in enumerate(self._instance_names): 

50 if instance_name == name: 

51 return self._instance_paths[idx] 

52 return None 

53 

54 

55class FileInstanceSet(InstanceSet): 

56 """Object representation of a set of single-file instances.""" 

57 

58 def __init__(self: FileInstanceSet, target: Path) -> None: 

59 """Initialise an InstanceSet, where each instance is a file in the directory. 

60 

61 Args: 

62 target: Path to the instances directory. If multiple files are found, 

63 they are assumed to have the same number of instances per file. 

64 """ 

65 super().__init__(target) 

66 self.directory: Path = target 

67 self._name: str = target.name if target.is_dir() else target.stem 

68 if self.directory.is_file(): 

69 # Single instance set 

70 self._instance_paths = [self.directory] 

71 self._instance_names = [self.directory.name] 

72 self.directory = self.directory.parent 

73 else: 

74 # Default situation, treat each file in the directory as an instance 

75 self._instance_paths = [p for p in self.directory.iterdir()] 

76 self._instance_names = [p.name for p in self._instance_paths] 

77 

78 @property 

79 def name(self: FileInstanceSet) -> str: 

80 """Get instance set name.""" 

81 return self._name 

82 

83 

84class MultiFileInstanceSet(InstanceSet): 

85 """Object representation of a set of multi-file instances.""" 

86 instance_csv = "instances.csv" 

87 

88 def __init__(self: MultiFileInstanceSet, target: Path | list[str, Path]) -> None: 

89 """Initialise an Instances object from a directory. 

90 

91 Args: 

92 target: Path to the instances directory. Will read from instance_list.csv. 

93 If directory is a list of [str, Path], create an Instance set of one. 

94 """ 

95 super().__init__(target) 

96 if isinstance(target, list): 

97 # A single instance represented as a list of [name, path1, path2, ...] 

98 instance_list = target 

99 target = target[1].parent 

100 elif isinstance(target, Path): 

101 # A path pointing to the directory of instances 

102 instance_file = target / MultiFileInstanceSet.instance_csv 

103 # Read from instance_file 

104 instance_list = [line for line in csv.reader(instance_file.open())] 

105 

106 self.directory = target if target.is_dir() else target.parent 

107 self._instance_names, self._instance_paths = [], [] 

108 for instance in instance_list: 

109 self._instance_names.append(instance[0]) 

110 self._instance_paths.append([ 

111 (self.directory / f) if isinstance(f, str) else f for f in instance[1:]]) 

112 

113 @property 

114 def all_paths(self: MultiFileInstanceSet) -> list[Path]: 

115 """Returns all file paths in the instance set as a flat list.""" 

116 return [p for instance in self._instance_paths for p in instance] + [ 

117 self.directory / MultiFileInstanceSet.instance_csv] 

118 

119 

120class IterableFileInstanceSet(InstanceSet): 

121 """Object representation of files containing multiple instances.""" 

122 supported_filetypes = set([".csv", ".npy"]) 

123 

124 def __init__(self: IterableFileInstanceSet, target: Path) -> None: 

125 """Initialise an InstanceSet from a single file. 

126 

127 Args: 

128 target: Path to the instances directory. If multiple files are found, 

129 they are assumed to have the same number of instances. 

130 """ 

131 super().__init__(target) 

132 self.directory = target 

133 self._instance_paths =\ 

134 [p for p in self.directory.iterdir() 

135 if p.suffix in IterableFileInstanceSet.supported_filetypes] 

136 self._size = len(self._instance_paths[0].open().readlines()) 

137 self._instance_names = [p.name for p in self._instance_paths] 

138 

139 @property 

140 def size(self: IterableFileInstanceSet) -> int: 

141 """Returns the number of instances in the set.""" 

142 return self._size 

143 

144 @staticmethod 

145 def __determine_size__(file: Path) -> int: 

146 """Determine the number of instances in a file.""" 

147 match file.suffix: 

148 case ".csv": 

149 return len(file.open().readlines()) 

150 case ".npy": 

151 return len(np.load(file))