Coverage for sparkle/instance/instances.py: 99%
84 statements
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
« prev ^ index » next coverage.py v7.9.1, created at 2025-07-01 13:21 +0000
1"""Objects and methods relating to instances for Sparkle."""
2from __future__ import annotations
3from pathlib import Path
5import csv
6import numpy as np
9class InstanceSet:
10 """Base object representation of a set of instances."""
12 def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None:
13 """Initialise an Instances object from a directory.
15 Args:
16 target: The Path, or list of paths to create the instance set from.
17 """
18 self.directory: Path = target
19 self._instance_names: list[str] = []
20 self._instance_paths: list[Path] = []
22 @property
23 def size(self: InstanceSet) -> int:
24 """Returns the number of instances in the set."""
25 return len(self._instance_paths)
27 @property
28 def all_paths(self: InstanceSet) -> list[Path]:
29 """Returns all file paths in the instance set as a flat list."""
30 return self._instance_paths
32 @property
33 def instance_paths(self: InstanceSet) -> list[Path]:
34 """Get processed instance paths."""
35 return self._instance_paths
37 @property
38 def instance_names(self: InstanceSet) -> list[str]:
39 """Get processed instance names for instances."""
40 return self._instance_names
42 @property
43 def instances(self: InstanceSet) -> list[str]:
44 """Get instance names with relative path."""
45 return [str(p.with_suffix("")) for p in self._instance_paths]
47 @property
48 def name(self: InstanceSet) -> str:
49 """Get instance set name."""
50 return self.directory.name
52 def __str__(self: InstanceSet) -> str:
53 """Get the string representation of an Instance Set."""
54 return self.name
56 def __repr__(self: InstanceSet) -> str:
57 """Get detailed representation of an Instance Set."""
58 return f"{self.name}:\n"\
59 f"\t- Type: {type(self).__name__}\n"\
60 f"\t- Directory: {self.directory}\n"\
61 f"\t- # Instances: {self.size}"
63 def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]:
64 """Retrieves an instance paths by its name. Returns None upon failure."""
65 for idx, instance_name in enumerate(self._instance_names):
66 if instance_name == name:
67 return self._instance_paths[idx]
68 return None
71class FileInstanceSet(InstanceSet):
72 """Object representation of a set of single-file instances."""
74 def __init__(self: FileInstanceSet, target: Path) -> None:
75 """Initialise an InstanceSet, where each instance is a file in the directory.
77 Args:
78 target: Path to the instances directory. If multiple files are found,
79 they are assumed to have the same number of instances per file.
80 """
81 super().__init__(target)
82 self._name: str = target.stem
83 if target.is_file():
84 # Single instance set
85 self._instance_paths = [target]
86 self._instance_names = [target.stem]
87 self.directory = target.parent
88 else:
89 # Default situation, treat each file in the directory as an instance
90 self._instance_paths = [p for p in self.directory.iterdir()]
91 self._instance_names = [p.stem for p in self._instance_paths]
93 @property
94 def name(self: FileInstanceSet) -> str:
95 """Get instance set name."""
96 return self._name
99class MultiFileInstanceSet(InstanceSet):
100 """Object representation of a set of multi-file instances."""
101 instance_csv = "instances.csv"
103 def __init__(self: MultiFileInstanceSet, target: Path) -> None:
104 """Initialise an Instances object from a directory.
106 Args:
107 target: Path to the instances directory. Will read from instances.csv.
108 """
109 target_dir = target.parent if not target.is_dir() else target
110 super().__init__(target_dir)
111 # A path pointing to the directory of instances
112 self.instance_file = self.directory / MultiFileInstanceSet.instance_csv
113 # Read from instance_file
114 if not target.is_dir():
115 # Single file
116 instance_list = [line for line in csv.reader(
117 self.instance_file.open()) if target.stem in line]
118 else:
119 # Multi file
120 instance_list = [line for line in csv.reader(self.instance_file.open())]
122 for instance in instance_list:
123 self._instance_names.append(instance[0])
124 self._instance_paths.append([
125 (self.directory / f) if isinstance(f, str) else f for f in instance[1:]])
127 @property
128 def all_paths(self: MultiFileInstanceSet) -> list[Path]:
129 """Returns all file paths in the instance set as a flat list."""
130 return [p for instance in self._instance_paths for p in instance] + \
131 [self.instance_file]
133 @property
134 def instances(self: InstanceSet) -> list[str]:
135 """Get instance names with relative path for multi-file instances."""
136 return [self.directory / inst_name for inst_name in self.instance_names]
139class IterableFileInstanceSet(InstanceSet):
140 """Object representation of files containing multiple instances."""
141 supported_filetypes = set([".csv", ".npy"])
143 def __init__(self: IterableFileInstanceSet, target: Path) -> None:
144 """Initialise an InstanceSet from a single file.
146 Args:
147 target: Path to the instances directory. If multiple files are found,
148 they are assumed to have the same number of instances.
149 """
150 super().__init__(target)
151 self._instance_paths =\
152 [p for p in self.directory.iterdir()
153 if p.suffix in IterableFileInstanceSet.supported_filetypes]
154 self._size = IterableFileInstanceSet.__determine_size__(self._instance_paths[0])
155 self._instance_names = [p.name for p in self._instance_paths]
157 @property
158 def size(self: IterableFileInstanceSet) -> int:
159 """Returns the number of instances in the set."""
160 return self._size
162 @staticmethod
163 def __determine_size__(file: Path) -> int:
164 """Determine the number of instances in a file."""
165 match file.suffix:
166 case ".csv":
167 return len(file.open().readlines())
168 case ".npy":
169 return len(np.load(file))