Source code for sparkle.solver.selector

"""File to handle a Selector for selecting Solvers."""
from __future__ import annotations
from pathlib import Path

from sklearn.base import ClassifierMixin, RegressorMixin
from asf.cli import cli_train as asf_cli
from asf.scenario.scenario_metadata import ScenarioMetadata
from asf.predictors import AbstractPredictor
from asf.selectors.abstract_model_based_selector import AbstractModelBasedSelector

import runrunner as rrr
from runrunner import Runner, Run

from sparkle.types import SparkleObjective
from sparkle.structures import FeatureDataFrame, PerformanceDataFrame



[docs]
class Selector:
    """The Selector class for handling Algorithm Selection."""

    def __init__(
            self: Selector,
            selector_class: AbstractModelBasedSelector,
            model_class: AbstractPredictor | ClassifierMixin | RegressorMixin) -> None:
        """Initialize the Selector object.

        Args:
            selector_class: The Selector class to construct.
            model_class: The model class the selector will use.
        """
        self.selector_class = selector_class
        self.model_class = model_class

    @property
    def name(self: Selector) -> str:
        """Return the name of the selector."""
        return f"{self.selector_class.__name__}_{self.model_class.__name__}"


[docs]
    def construct(self: Selector,
                  target_file: Path,
                  performance_data: PerformanceDataFrame,
                  feature_data: FeatureDataFrame,
                  objective: SparkleObjective,
                  solver_cutoff: int | float | str = None,
                  run_on: Runner = Runner.SLURM,
                  sbatch_options: list[str] = None,
                  slurm_prepend: str | list[str] | Path = None,
                  base_dir: Path = Path()) -> Run:
        """Construct the Selector.

        Args:
            target_file: Path to the file to save the Selector to.
            performance_data: Path to the performance data csv.
            feature_data: Path to the feature data csv.
            objective: The objective to optimize for selection.
            runtime_cutoff: Cutoff for the runtime in seconds.
            run_on: Which runner to use. Defaults to slurm.
            sbatch_options: Additional options to pass to sbatch.
            slurm_prepend: Slurm script to prepend to the sbatch
            base_dir: The base directory to run the Selector in.

        Returns:
            The construction Run
        """
        # Convert the dataframes to Selector Format
        # Requires instances as index for both, columns as features / solvers
        # Remove redundant data
        performance_csv = performance_data.drop(
            [PerformanceDataFrame.column_seed,
             PerformanceDataFrame.column_configuration],
            axis=1, level=1).droplevel(level=1, axis=1)
        performance_csv = performance_csv.loc[objective.name]  # Select objective
        performance_csv.index = performance_csv.index.droplevel("Run")  # Drop runs
        performance_path = target_file.parent / performance_data.csv_filepath.name
        performance_csv.to_csv(target_file.parent / performance_data.csv_filepath.name)

        # Features requires instances as index, columns as feature names
        feature_csv = feature_data.dataframe.copy()
        feature_csv.index = feature_csv.index.map("_".join)  # Reduce Multi-Index
        feature_csv = feature_csv.T  # ASF has feature columns and instance rows
        feature_path = target_file.parent / feature_data.csv_filepath.name
        feature_csv.to_csv(feature_path)

        selector = self.selector_class(
            self.model_class, ScenarioMetadata(
                algorithms=performance_data.solvers,
                features=feature_csv.columns.to_list(),
                performance_metric=objective.name,
                maximize=not objective.minimise,
                budget=solver_cutoff
            )
        )

        cmd = asf_cli.build_cli_command(selector,
                                        feature_path,
                                        performance_path,
                                        target_file)

        cmd = [" ".join([str(c) for c in cmd])]
        construct = rrr.add_to_queue(
            runner=run_on,
            cmd=cmd,
            name=f"{self.name} Selector Construction: "
                 f"{', '.join([Path(s).name for s in performance_data.solvers])}",
            base_dir=base_dir,
            sbatch_options=sbatch_options,
            prepend=slurm_prepend)
        if run_on == Runner.LOCAL:
            construct.wait()
            if not target_file.is_file():
                print(f"Selector construction of {self.name} failed!")

        return construct



[docs]
    def run(self: Selector,
            selector_path: Path,
            instance: str,
            feature_data: FeatureDataFrame) -> list:
        """Run the Selector, returning the prediction schedule upon success."""
        instance_features = feature_data.dataframe[[instance, ]]
        instance_features.index = instance_features.index.map("_".join)  # Reduce
        instance_features = instance_features.T  # ASF dataframe structure
        selector = self.selector_class.load(selector_path)
        schedule = selector.predict(instance_features)
        if schedule is None:
            print(f"ERROR: Selector {self.name} failed predict schedule!")
        return schedule[instance]