Coverage for sparkle/instance/instances.py: 100%
84 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-29 10:17 +0000
1"""Objects and methods relating to instances for Sparkle."""
3from __future__ import annotations
4from pathlib import Path
6import csv
7import numpy as np
10class InstanceSet:
11 """Base object representation of a set of instances."""
13 def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None:
14 """Initialise an Instances object from a directory.
16 Args:
17 target: The Path, or list of paths to create the instance set from.
18 """
19 self.directory: Path = target
20 self._instance_names: list[str] = []
21 self._instance_paths: list[Path] = []
23 @property
24 def size(self: InstanceSet) -> int:
25 """Returns the number of instances in the set."""
26 return len(self._instance_paths)
28 @property
29 def all_paths(self: InstanceSet) -> list[Path]:
30 """Returns all file paths in the instance set as a flat list."""
31 return self._instance_paths
33 @property
34 def instance_paths(self: InstanceSet) -> list[Path]:
35 """Get processed instance paths."""
36 return self._instance_paths
38 @property
39 def instance_names(self: InstanceSet) -> list[str]:
40 """Get processed instance names for instances."""
41 return self._instance_names
43 @property
44 def instances(self: InstanceSet) -> list[str]:
45 """Get instance names with relative path."""
46 return [str(p.with_suffix("")) for p in self._instance_paths]
48 @property
49 def name(self: InstanceSet) -> str:
50 """Get instance set name."""
51 return self.directory.name
53 def __str__(self: InstanceSet) -> str:
54 """Get the string representation of an Instance Set."""
55 return self.name
57 def __repr__(self: InstanceSet) -> str:
58 """Get detailed representation of an Instance Set."""
59 return (
60 f"{self.name}:\n"
61 f"\t- Type: {type(self).__name__}\n"
62 f"\t- Directory: {self.directory}\n"
63 f"\t- # Instances: {self.size}"
64 )
66 def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]:
67 """Retrieves an instance paths by its name. Returns None upon failure."""
68 for idx, instance_name in enumerate(self._instance_names):
69 if instance_name == name:
70 return self._instance_paths[idx]
71 return None
74class FileInstanceSet(InstanceSet):
75 """Object representation of a set of single-file instances."""
77 def __init__(self: FileInstanceSet, target: Path) -> None:
78 """Initialise an InstanceSet, where each instance is a file in the directory.
80 Args:
81 target: Path to the instances directory. If multiple files are found,
82 they are assumed to have the same number of instances per file.
83 """
84 super().__init__(target)
85 self._name: str = target.stem
86 if target.is_file():
87 # Single instance set
88 self._instance_paths = [target]
89 self._instance_names = [target.stem]
90 self.directory = target.parent
91 else:
92 # Default situation, treat each file in the directory as an instance
93 self._instance_paths = [p for p in self.directory.iterdir()]
94 self._instance_names = [p.stem for p in self._instance_paths]
96 @property
97 def name(self: FileInstanceSet) -> str:
98 """Get instance set name."""
99 return self._name
102class MultiFileInstanceSet(InstanceSet):
103 """Object representation of a set of multi-file instances."""
105 instance_csv = "instances.csv"
107 def __init__(self: MultiFileInstanceSet, target: Path) -> None:
108 """Initialise an Instances object from a directory.
110 Args:
111 target: Path to the instances directory. Will read from instances.csv.
112 """
113 target_dir = target.parent if not target.is_dir() else target
114 super().__init__(target_dir)
115 # A path pointing to the directory of instances
116 self.instance_file = self.directory / MultiFileInstanceSet.instance_csv
117 # Read from instance_file
118 if not target.is_dir():
119 # Single file
120 instance_list = [
121 line
122 for line in csv.reader(self.instance_file.open())
123 if target.stem in line
124 ]
125 else:
126 # Multi file
127 instance_list = [line for line in csv.reader(self.instance_file.open())]
129 for instance in instance_list:
130 self._instance_names.append(instance[0])
131 self._instance_paths.append(
132 [(self.directory / f) if isinstance(f, str) else f for f in instance[1:]]
133 )
135 @property
136 def all_paths(self: MultiFileInstanceSet) -> list[Path]:
137 """Returns all file paths in the instance set as a flat list."""
138 return [p for instance in self._instance_paths for p in instance] + [
139 self.instance_file
140 ]
142 @property
143 def instances(self: InstanceSet) -> list[str]:
144 """Get instance names with relative path for multi-file instances."""
145 return [self.directory / inst_name for inst_name in self.instance_names]
148class IterableFileInstanceSet(InstanceSet):
149 """Object representation of files containing multiple instances."""
151 supported_filetypes = set([".csv", ".npy"])
153 def __init__(self: IterableFileInstanceSet, target: Path) -> None:
154 """Initialise an InstanceSet from a single file.
156 Args:
157 target: Path to the instances directory. If multiple files are found,
158 they are assumed to have the same number of instances.
159 """
160 super().__init__(target)
161 self._instance_paths = [
162 p
163 for p in self.directory.iterdir()
164 if p.suffix in IterableFileInstanceSet.supported_filetypes
165 ]
166 self._size = IterableFileInstanceSet.__determine_size__(self._instance_paths[0])
167 self._instance_names = [p.name for p in self._instance_paths]
169 @property
170 def size(self: IterableFileInstanceSet) -> int:
171 """Returns the number of instances in the set."""
172 return self._size
174 @staticmethod
175 def __determine_size__(file: Path) -> int:
176 """Determine the number of instances in a file."""
177 match file.suffix:
178 case ".csv":
179 return len(file.open().readlines())
180 case ".npy":
181 return len(np.load(file))