Coverage for sparkle/instance/instances.py: 78%
81 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-07 15:22 +0000
1"""Objects and methods relating to instances for Sparkle."""
2from __future__ import annotations
3from pathlib import Path
5import csv
6import numpy as np
9class InstanceSet:
10 """Base object representation of a set of instances."""
12 def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None:
13 """Initialise an Instances object from a directory.
15 Args:
16 target: The Path, or list of paths to create the instance set from.
17 """
18 self.directory: Path = target
19 self._instance_names: list[str] = []
20 self._instance_paths: list[Path] = []
22 @property
23 def size(self: InstanceSet) -> int:
24 """Returns the number of instances in the set."""
25 return len(self._instance_paths)
27 @property
28 def all_paths(self: InstanceSet) -> list[Path]:
29 """Returns all file paths in the instance set as a flat list."""
30 return self._instance_paths
32 @property
33 def instance_paths(self: InstanceSet) -> list[Path]:
34 """Get processed instance paths."""
35 return self._instance_paths
37 @property
38 def instance_names(self: InstanceSet) -> list[str]:
39 """Get processed instance names for multi-file instances."""
40 return self._instance_names
42 @property
43 def name(self: InstanceSet) -> str:
44 """Get instance set name."""
45 return self.directory.name
47 def __str__(self: InstanceSet) -> str:
48 """Get the string representation of an Instance Set."""
49 return self.name
51 def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]:
52 """Retrieves an instance paths by its name. Returns None upon failure."""
53 for idx, instance_name in enumerate(self._instance_names):
54 if instance_name == name:
55 return self._instance_paths[idx]
56 return None
59class FileInstanceSet(InstanceSet):
60 """Object representation of a set of single-file instances."""
62 def __init__(self: FileInstanceSet, target: Path) -> None:
63 """Initialise an InstanceSet, where each instance is a file in the directory.
65 Args:
66 target: Path to the instances directory. If multiple files are found,
67 they are assumed to have the same number of instances per file.
68 """
69 super().__init__(target)
70 self.directory: Path = target
71 self._name: str = target.name if target.is_dir() else target.stem
72 if self.directory.is_file():
73 # Single instance set
74 self._instance_paths = [self.directory]
75 self._instance_names = [self.directory.name]
76 self.directory = self.directory.parent
77 else:
78 # Default situation, treat each file in the directory as an instance
79 self._instance_paths = [p for p in self.directory.iterdir()]
80 self._instance_names = [p.name for p in self._instance_paths]
82 @property
83 def name(self: FileInstanceSet) -> str:
84 """Get instance set name."""
85 return self._name
88class MultiFileInstanceSet(InstanceSet):
89 """Object representation of a set of multi-file instances."""
90 instance_csv = "instances.csv"
92 def __init__(self: MultiFileInstanceSet, target: Path | list[str, Path]) -> None:
93 """Initialise an Instances object from a directory.
95 Args:
96 target: Path to the instances directory. Will read from instance_list.csv.
97 If directory is a list of [str, Path], create an Instance set of one.
98 """
99 super().__init__(target)
100 if isinstance(target, list):
101 # A single instance represented as a list of [name, path1, path2, ...]
102 instance_list = target
103 target = target[1].parent
104 elif isinstance(target, Path):
105 # A path pointing to the directory of instances
106 instance_file = target / MultiFileInstanceSet.instance_csv
107 # Read from instance_file
108 instance_list = [line for line in csv.reader(instance_file.open())]
110 self.directory = target if target.is_dir() else target.parent
111 self._instance_names, self._instance_paths = [], []
112 for instance in instance_list:
113 self._instance_names.append(instance[0])
114 self._instance_paths.append([
115 (self.directory / f) if isinstance(f, str) else f for f in instance[1:]])
117 @property
118 def all_paths(self: MultiFileInstanceSet) -> list[Path]:
119 """Returns all file paths in the instance set as a flat list."""
120 return [p for instance in self._instance_paths for p in instance] + [
121 self.directory / MultiFileInstanceSet.instance_csv]
124class IterableFileInstanceSet(InstanceSet):
125 """Object representation of files containing multiple instances."""
126 supported_filetypes = set([".csv", ".npy"])
128 def __init__(self: IterableFileInstanceSet, target: Path) -> None:
129 """Initialise an InstanceSet from a single file.
131 Args:
132 target: Path to the instances directory. If multiple files are found,
133 they are assumed to have the same number of instances.
134 """
135 super().__init__(target)
136 self.directory = target
137 self._instance_paths =\
138 [p for p in self.directory.iterdir()
139 if p.suffix in IterableFileInstanceSet.supported_filetypes]
140 self._size = len(self._instance_paths[0].open().readlines())
141 self._instance_names = [p.name for p in self._instance_paths]
143 @property
144 def size(self: IterableFileInstanceSet) -> int:
145 """Returns the number of instances in the set."""
146 return self._size
148 @staticmethod
149 def __determine_size__(file: Path) -> int:
150 """Determine the number of instances in a file."""
151 match file.suffix:
152 case ".csv":
153 return len(file.open().readlines())
154 case ".npy":
155 return len(np.load(file))