Coverage for sparkle/instance/instances.py: 86%
79 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-05 14:48 +0000
1"""Objects and methods relating to instances for Sparkle."""
2from __future__ import annotations
3from pathlib import Path
5import csv
6import numpy as np
9class InstanceSet:
10 """Base object representation of a set of instances."""
12 def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None:
13 """Initialise an Instances object from a directory.
15 Args:
16 target: The Path, or list of paths to create the instance set from.
17 """
18 self.directory: Path = target
19 self._instance_names: list[str] = []
20 self._instance_paths: list[Path] = []
22 @property
23 def size(self: InstanceSet) -> int:
24 """Returns the number of instances in the set."""
25 return len(self._instance_paths)
27 @property
28 def all_paths(self: InstanceSet) -> list[Path]:
29 """Returns all file paths in the instance set as a flat list."""
30 return self._instance_paths
32 @property
33 def instance_paths(self: InstanceSet) -> list[Path]:
34 """Get processed instance paths."""
35 return self._instance_paths
37 @property
38 def instance_names(self: InstanceSet) -> list[str]:
39 """Get processed instance names for multi-file instances."""
40 return self._instance_names
42 @property
43 def name(self: InstanceSet) -> str:
44 """Get instance set name."""
45 return self.directory.name
47 def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]:
48 """Retrieves an instance paths by its name. Returns None upon failure."""
49 for idx, instance_name in enumerate(self._instance_names):
50 if instance_name == name:
51 return self._instance_paths[idx]
52 return None
55class FileInstanceSet(InstanceSet):
56 """Object representation of a set of single-file instances."""
58 def __init__(self: FileInstanceSet, target: Path) -> None:
59 """Initialise an InstanceSet, where each instance is a file in the directory.
61 Args:
62 target: Path to the instances directory. If multiple files are found,
63 they are assumed to have the same number of instances per file.
64 """
65 super().__init__(target)
66 self.directory: Path = target
67 self._name: str = target.name if target.is_dir() else target.stem
68 if self.directory.is_file():
69 # Single instance set
70 self._instance_paths = [self.directory]
71 self._instance_names = [self.directory.name]
72 self.directory = self.directory.parent
73 else:
74 # Default situation, treat each file in the directory as an instance
75 self._instance_paths = [p for p in self.directory.iterdir()]
76 self._instance_names = [p.name for p in self._instance_paths]
78 @property
79 def name(self: FileInstanceSet) -> str:
80 """Get instance set name."""
81 return self._name
84class MultiFileInstanceSet(InstanceSet):
85 """Object representation of a set of multi-file instances."""
86 instance_csv = "instances.csv"
88 def __init__(self: MultiFileInstanceSet, target: Path | list[str, Path]) -> None:
89 """Initialise an Instances object from a directory.
91 Args:
92 target: Path to the instances directory. Will read from instance_list.csv.
93 If directory is a list of [str, Path], create an Instance set of one.
94 """
95 super().__init__(target)
96 if isinstance(target, list):
97 # A single instance represented as a list of [name, path1, path2, ...]
98 instance_list = target
99 target = target[1].parent
100 elif isinstance(target, Path):
101 # A path pointing to the directory of instances
102 instance_file = target / MultiFileInstanceSet.instance_csv
103 # Read from instance_file
104 instance_list = [line for line in csv.reader(instance_file.open())]
106 self.directory = target if target.is_dir() else target.parent
107 self._instance_names, self._instance_paths = [], []
108 for instance in instance_list:
109 self._instance_names.append(instance[0])
110 self._instance_paths.append([
111 (self.directory / f) if isinstance(f, str) else f for f in instance[1:]])
113 @property
114 def all_paths(self: MultiFileInstanceSet) -> list[Path]:
115 """Returns all file paths in the instance set as a flat list."""
116 return [p for instance in self._instance_paths for p in instance] + [
117 self.directory / MultiFileInstanceSet.instance_csv]
120class IterableFileInstanceSet(InstanceSet):
121 """Object representation of files containing multiple instances."""
122 supported_filetypes = set([".csv", ".npy"])
124 def __init__(self: IterableFileInstanceSet, target: Path) -> None:
125 """Initialise an InstanceSet from a single file.
127 Args:
128 target: Path to the instances directory. If multiple files are found,
129 they are assumed to have the same number of instances.
130 """
131 super().__init__(target)
132 self.directory = target
133 self._instance_paths =\
134 [p for p in self.directory.iterdir()
135 if p.suffix in IterableFileInstanceSet.supported_filetypes]
136 self._size = len(self._instance_paths[0].open().readlines())
137 self._instance_names = [p.name for p in self._instance_paths]
139 @property
140 def size(self: IterableFileInstanceSet) -> int:
141 """Returns the number of instances in the set."""
142 return self._size
144 @staticmethod
145 def __determine_size__(file: Path) -> int:
146 """Determine the number of instances in a file."""
147 match file.suffix:
148 case ".csv":
149 return len(file.open().readlines())
150 case ".npy":
151 return len(np.load(file))