Source code for sparkle.instance.instances

"""Objects and methods relating to instances for Sparkle."""
from __future__ import annotations
from pathlib import Path

import csv
import numpy as np


[docs] class InstanceSet: """Base object representation of a set of instances.""" def __init__(self: InstanceSet, target: Path | list[str, Path]) -> None: """Initialise an Instances object from a directory. Args: target: The Path, or list of paths to create the instance set from. """ self.directory: Path = target self._instance_names: list[str] = [] self._instance_paths: list[Path] = [] @property def size(self: InstanceSet) -> int: """Returns the number of instances in the set.""" return len(self._instance_paths) @property def all_paths(self: InstanceSet) -> list[Path]: """Returns all file paths in the instance set as a flat list.""" return self._instance_paths @property def instance_paths(self: InstanceSet) -> list[Path]: """Get processed instance paths.""" return self._instance_paths @property def instance_names(self: InstanceSet) -> list[str]: """Get processed instance names for instances.""" return self._instance_names @property def instances(self: InstanceSet) -> list[str]: """Get instance names with relative path.""" return [str(p.with_suffix("")) for p in self._instance_paths] @property def name(self: InstanceSet) -> str: """Get instance set name.""" return self.directory.name def __str__(self: InstanceSet) -> str: """Get the string representation of an Instance Set.""" return self.name def __repr__(self: InstanceSet) -> str: """Get detailed representation of an Instance Set.""" return f"{self.name}:\n"\ f"\t- Type: {type(self).__name__}\n"\ f"\t- Directory: {self.directory}\n"\ f"\t- # Instances: {self.size}"
[docs] def get_path_by_name(self: InstanceSet, name: str) -> Path | list[Path]: """Retrieves an instance paths by its name. Returns None upon failure.""" for idx, instance_name in enumerate(self._instance_names): if instance_name == name: return self._instance_paths[idx] return None
[docs] class FileInstanceSet(InstanceSet): """Object representation of a set of single-file instances.""" def __init__(self: FileInstanceSet, target: Path) -> None: """Initialise an InstanceSet, where each instance is a file in the directory. Args: target: Path to the instances directory. If multiple files are found, they are assumed to have the same number of instances per file. """ super().__init__(target) self._name: str = target.stem if target.is_file(): # Single instance set self._instance_paths = [target] self._instance_names = [target.stem] self.directory = target.parent else: # Default situation, treat each file in the directory as an instance self._instance_paths = [p for p in self.directory.iterdir()] self._instance_names = [p.stem for p in self._instance_paths] @property def name(self: FileInstanceSet) -> str: """Get instance set name.""" return self._name
[docs] class MultiFileInstanceSet(InstanceSet): """Object representation of a set of multi-file instances.""" instance_csv = "instances.csv" def __init__(self: MultiFileInstanceSet, target: Path) -> None: """Initialise an Instances object from a directory. Args: target: Path to the instances directory. Will read from instances.csv. """ target_dir = target.parent if not target.is_dir() else target super().__init__(target_dir) # A path pointing to the directory of instances self.instance_file = self.directory / MultiFileInstanceSet.instance_csv # Read from instance_file if not target.is_dir(): # Single file instance_list = [line for line in csv.reader( self.instance_file.open()) if target.stem in line] else: # Multi file instance_list = [line for line in csv.reader(self.instance_file.open())] for instance in instance_list: self._instance_names.append(instance[0]) self._instance_paths.append([ (self.directory / f) if isinstance(f, str) else f for f in instance[1:]]) @property def all_paths(self: MultiFileInstanceSet) -> list[Path]: """Returns all file paths in the instance set as a flat list.""" return [p for instance in self._instance_paths for p in instance] + \ [self.instance_file] @property def instances(self: InstanceSet) -> list[str]: """Get instance names with relative path for multi-file instances.""" return [self.directory / inst_name for inst_name in self.instance_names]
[docs] class IterableFileInstanceSet(InstanceSet): """Object representation of files containing multiple instances.""" supported_filetypes = set([".csv", ".npy"]) def __init__(self: IterableFileInstanceSet, target: Path) -> None: """Initialise an InstanceSet from a single file. Args: target: Path to the instances directory. If multiple files are found, they are assumed to have the same number of instances. """ super().__init__(target) self._instance_paths =\ [p for p in self.directory.iterdir() if p.suffix in IterableFileInstanceSet.supported_filetypes] self._size = IterableFileInstanceSet.__determine_size__(self._instance_paths[0]) self._instance_names = [p.name for p in self._instance_paths] @property def size(self: IterableFileInstanceSet) -> int: """Returns the number of instances in the set.""" return self._size @staticmethod def __determine_size__(file: Path) -> int: """Determine the number of instances in a file.""" match file.suffix: case ".csv": return len(file.open().readlines()) case ".npy": return len(np.load(file))