"""Parameter Configuration Space tools."""
from __future__ import annotations
import re
import ast
from enum import Enum
from pathlib import Path
import ConfigSpace.conditions
import tabulate
import ConfigSpace
from ConfigSpace import ConfigurationSpace
from sparkle.tools.configspace import expression_to_configspace
class PCSConvention(Enum):
"""Internal pcs convention enum."""
UNKNOWN = "UNKNOWN"
SMAC = "smac"
ParamILS = "paramils"
IRACE = "irace"
ConfigSpace = "configspace"
[docs]
class PCSConverter:
"""Parser class independent file of notation."""
section_regex = re.compile(r"\[(?P<name>[a-zA-Z]+?)\]\s*(?P<comment>#.*)?$")
illegal_param_name = re.compile(r"[!:\-+@!#$%^&*()=<>?/\|~` ]")
smac2_params_regex = re.compile(
r"^(?P<name>[a-zA-Z0-9_]+)\s+(?P<type>[a-zA-Z]+)\s+"
r"(?P<values>[a-zA-Z0-9\-\[\]{}_,. ]+)\s*"
r"\[(?P<default>[a-zA-Z0-9._-]+)\]?\s*"
r"(?P<scale>log)?\s*(?P<comment>#.*)?$"
)
smac2_conditions_regex = re.compile(
r"^(?P<parameter>[a-zA-Z0-9_]+)\s*\|\s*"
r"(?P<expression>.+)$"
)
smac2_forbidden_regex = re.compile(r"\{(?P<forbidden>.+)\}$")
paramils_params_regex = re.compile(
r"^(?P<name>[a-zA-Z0-9@!#:_-]+)\s*"
r"(?P<values>{[a-zA-Z0-9._+\-\, ]+})\s*"
r"\[(?P<default>[a-zA-Z0-9._\-+ ]+)\]?\s*"
r"(?P<comment>#.*)?$"
)
paramils_conditions_regex = re.compile(
r"^(?P<parameter>[a-zA-Z0-9@!#:_-]+)\s*\|\s*"
r"(?P<expression>.+)$"
)
irace_params_regex = re.compile(
r"^(?P<name>[a-zA-Z0-9_]+)\s+"
r"(?P<switch>[\"a-zA-Z0-9_\- ]+)\s+"
r"(?P<type>[cior])(?:,)?(?P<scale>log)?\s+"
r"(?P<values>[a-zA-Z0-9\-()_,. ]+)\s*"
r"(?:\|)?(?P<conditions>[a-zA-Z-0-9_!=<>\%()\&\|\. ]*)?\s*"
r"(?P<comment>#.*)?$"
)
[docs]
@staticmethod
def get_convention(file: Path) -> PCSConvention:
"""Determines the format of a pcs file."""
try:
ConfigSpace.ConfigurationSpace.from_yaml(file)
return PCSConvention.ConfigSpace
except Exception:
pass
try:
ConfigSpace.ConfigurationSpace.from_json(file)
return PCSConvention.ConfigSpace
except Exception:
pass
file_contents = file.open().readlines()
for line in file_contents:
if line.startswith("#"): # Comment line
continue
if "#" in line:
line, _ = line.split("#", maxsplit=1)
line = line.strip()
if re.match(PCSConverter.smac2_params_regex, line):
return PCSConvention.SMAC
elif re.match(PCSConverter.paramils_params_regex, line):
return PCSConvention.ParamILS
elif re.match(PCSConverter.irace_params_regex, line):
return PCSConvention.IRACE
return PCSConvention.UNKNOWN
[docs]
@staticmethod
def parse(file: Path, convention: PCSConvention = None) -> ConfigurationSpace:
"""Determines the format of a pcs file and parses into Configuration Space."""
if not convention:
convention = PCSConverter.get_convention(file)
if convention == PCSConvention.ConfigSpace:
if file.suffix == ".yaml":
return ConfigSpace.ConfigurationSpace.from_yaml(file)
if file.suffix == ".json":
return ConfigSpace.ConfigurationSpace.from_json(file)
if convention == PCSConvention.SMAC:
return PCSConverter.parse_smac(file)
if convention == PCSConvention.ParamILS:
return PCSConverter.parse_paramils(file)
if convention == PCSConvention.IRACE:
return PCSConverter.parse_irace(file)
raise Exception(
f"PCS convention not recognised based on any lines in file:\n{file}"
)
[docs]
@staticmethod
def parse_smac(content: list[str] | Path) -> ConfigurationSpace:
"""Parses a SMAC2 file."""
space_name = content.name if isinstance(content, Path) else None
content = content.open().readlines() if isinstance(content, Path) else content
cs = ConfigurationSpace(space_name)
for line in content:
if not line.strip() or line.startswith("#"): # Empty or comment
continue
comment = None
line = line.strip()
if re.match(PCSConverter.smac2_params_regex, line):
parameter = re.fullmatch(PCSConverter.smac2_params_regex, line)
name = parameter.group("name")
parameter_type = parameter.group("type")
values = parameter.group("values")
default = parameter.group("default")
comment = parameter.group("comment")
scale = parameter.group("scale")
if parameter_type == "integer":
values = ast.literal_eval(values)
csparam = ConfigSpace.UniformIntegerHyperparameter(
name=name,
lower=int(values[0]),
upper=int(values[-1]),
default_value=int(default),
log=scale == "log",
meta=comment,
)
elif parameter_type == "real":
values = ast.literal_eval(values)
csparam = ConfigSpace.UniformFloatHyperparameter(
name=name,
lower=float(values[0]),
upper=float(values[-1]),
default_value=float(default),
log=scale == "log",
meta=comment,
)
elif parameter_type == "categorical":
values = re.sub(r"[{}\s]+", "", values).split(",")
csparam = ConfigSpace.CategoricalHyperparameter(
name=name,
choices=values,
default_value=default,
meta=comment,
# Does not seem to contain any weights?
)
elif parameter_type == "ordinal":
values = re.sub(r"[{}\s]+", "", values).split(",")
csparam = ConfigSpace.OrdinalHyperparameter(
name=name,
sequence=values,
default_value=default,
meta=comment,
)
cs.add(csparam)
elif re.match(PCSConverter.smac2_conditions_regex, line):
# Break up the expression into the smallest possible pieces
match = re.fullmatch(PCSConverter.smac2_conditions_regex, line)
parameter, condition = (
match.group("parameter"),
match.group("expression"),
)
parameter = cs[parameter.strip()]
condition = condition.replace(" || ", " or ").replace(" && ", " and ")
condition = re.sub(r"(?<![<>!=])=(?<![=])", "==", condition)
condition = re.sub(r"!==", "!=", condition)
condition = expression_to_configspace(
condition, cs, target_parameter=parameter
)
cs.add(condition)
elif re.match(PCSConverter.smac2_forbidden_regex, line):
match = re.fullmatch(PCSConverter.smac2_forbidden_regex, line)
forbidden = match.group("forbidden")
# Forbidden expressions structure <expression> <operator> <value>
# where expressions can contain:
# Logical Operators: >=, <=, >, <, ==, !=,
# Logical clause operators: ( ), ||, &&,
# Supported by SMAC2 but not by ConfigSpace?:
# Arithmetic Operators: +, -, *, ^, %
# Functions: abs, acos, asin, atan, cbrt, ceil, cos, cosh, exp, floor,
# log, log10, log2, sin, sinh, sqrt, tan, tanh
# NOTE: According to MA & JR, these were never actually supported
rejected_operators = (
"+",
"-",
"*",
"^",
"%",
"abs",
"acos",
"asin",
"atan",
"cbrt",
"ceil",
"cos",
"cosh",
"exp",
"floor",
"log",
"log10",
"log2",
"sin",
"sinh",
"sqrt",
"tan",
"tanh",
)
if any([r in forbidden.split(" ") for r in rejected_operators]):
print(
"WARNING: Arithmetic operators are not supported by "
"ConfigurationSpace. Skipping forbidden expression:\n"
f"{forbidden}"
)
continue
forbidden = (
forbidden.replace(" && ", " and ")
.replace(", ", " and ")
.replace(" || ", " or ")
.strip()
) # To AST notation
forbidden = re.sub(r"(?<![<>!=])=(?![=])", "==", forbidden)
forbidden = expression_to_configspace(forbidden, cs)
cs.add(forbidden)
else:
raise Exception(f"SMAC2 PCS expression not recognised on line:\n{line}")
return cs
[docs]
@staticmethod
def parse_paramils(content: list[str] | Path) -> ConfigurationSpace:
"""Parses a paramils file."""
space_name = content.name if isinstance(content, Path) else None
content = content.open().readlines() if isinstance(content, Path) else content
cs = ConfigurationSpace(name=space_name)
conditions_lines = {}
for line in content:
line = line.strip()
if not line or line.startswith("#"): # Empty or comment
continue
comment = None
if re.match(PCSConverter.paramils_params_regex, line):
parameter = re.fullmatch(PCSConverter.paramils_params_regex, line)
name = parameter.group("name")
if re.match(PCSConverter.illegal_param_name, name):
# ParamILS is flexible to which parameters are allowed.
# We do not allow it as it creates many problems with parsing
# expressions
raise ValueError(
f"ParamILS parameter name not allowed: {name}. "
"This is supported by ParamILS, but not by PCSConverter."
)
values = parameter.group("values")
values = values.replace("..", ",") # Replace automatic expansion
try:
values = list(ast.literal_eval(values)) # Values are sets
values = sorted(values)
if any([isinstance(v, float) for v in values]):
parameter_type = float
else:
parameter_type = int
except Exception: # of strings (Categorical)
values = values.replace("{", "").replace("}", "").split(",")
parameter_type = str
if len(values) == 1: # Not allowed by ConfigSpace for int / float
values = [str(values[0])]
parameter_type = str
default = parameter.group("default")
comment = parameter.group("comment")
if parameter_type is int:
csparam = ConfigSpace.UniformIntegerHyperparameter(
name=name,
lower=int(values[0]),
upper=int(values[-1]),
default_value=int(default),
meta=comment,
)
elif parameter_type is float:
csparam = ConfigSpace.UniformFloatHyperparameter(
name=name,
lower=float(values[0]),
upper=float(values[-1]),
default_value=float(default),
meta=comment,
)
elif parameter_type is str:
csparam = ConfigSpace.CategoricalHyperparameter(
name=name,
choices=values,
default_value=default,
meta=comment,
)
cs.add(csparam)
elif re.match(PCSConverter.paramils_conditions_regex, line):
# Break up the expression into the smallest possible pieces
match = re.fullmatch(PCSConverter.paramils_conditions_regex, line)
parameter, condition = (
match.group("parameter").strip(),
match.group("expression"),
)
condition = condition.replace(" || ", " or ").replace(" && ", " and ")
condition = re.sub(r"(?<![<>!=])=(?<![=])", "==", condition)
condition = re.sub(r"!==", "!=", condition)
# ParamILS supports multiple lines of conditions for a single parameter
# so we collect, with the AND operator and parse + add them later
if parameter not in conditions_lines:
conditions_lines[parameter] = condition
else:
conditions_lines[parameter] += " and " + condition
elif re.match(PCSConverter.smac2_forbidden_regex, line):
match = re.fullmatch(PCSConverter.smac2_forbidden_regex, line)
forbidden = match.group("forbidden")
# Forbidden expressions structure <expression> <operator> <value>
# where expressions can contain:
# Logical Operators: >=, <=, >, <, ==, !=,
# Logical clause operators: ( ), ||, &&,
# Supported by SMAC2 but not by ConfigSpace?:
# Arithmetic Operators: +, -, *, ^, %
# Functions: abs, acos, asin, atan, cbrt, ceil, cos, cosh, exp, floor,
# log, log10, log2, sin, sinh, sqrt, tan, tanh
# NOTE: According to MA & JR, these were never actually supported
rejected_operators = (
"+",
"-",
"*",
"^",
"%",
"abs",
"acos",
"asin",
"atan",
"cbrt",
"ceil",
"cos",
"cosh",
"exp",
"floor",
"log",
"log10",
"log2",
"sin",
"sinh",
"sqrt",
"tan",
"tanh",
)
if any([r in forbidden.split(" ") for r in rejected_operators]):
print(
"WARNING: Arithmetic operators are not supported by "
"ConfigurationSpace. Skipping forbidden expression:\n"
f"{forbidden}"
)
continue
forbidden = (
forbidden.replace(" && ", " and ")
.replace(", ", " and ")
.replace(" || ", " or ")
.strip()
) # To AST notation
forbidden = re.sub(r"(?<![<>!=])=(?![=])", "==", forbidden)
forbidden = expression_to_configspace(forbidden, cs)
cs.add(forbidden)
else:
raise Exception(
f"ParamILS PCS expression not recognised on line: {line}"
)
# Add the condition
for pname, cond in conditions_lines.items(): # Add conditions
condition = expression_to_configspace(cond, cs, target_parameter=cs[pname])
cs.add(condition)
return cs
[docs]
@staticmethod
def parse_irace(content: list[str] | Path) -> ConfigurationSpace:
"""Parses a irace file."""
space_name = content.name if isinstance(content, Path) else None
content = content.open().readlines() if isinstance(content, Path) else content
cs = ConfigurationSpace(name=space_name)
standardised_conditions = []
forbidden_flag, global_flag = False, False
for line in content:
line = line.strip()
if not line or line.startswith("#"): # Empty or comment
continue
if re.match(PCSConverter.section_regex, line):
section = re.fullmatch(PCSConverter.section_regex, line)
if section.group("name") == "forbidden":
forbidden_flag, global_flag = True, False
continue
elif section.group("name") == "global":
global_flag, forbidden_flag = True, False
continue
else:
raise Exception(f"IRACE PCS section not recognised on line:\n{line}")
elif global_flag: # Parse global statements
continue # We do not parse global group
elif forbidden_flag: # Parse forbidden statements
# Parse the forbidden statement to standardised format
forbidden_expr = re.sub(r" \& ", " and ", line)
forbidden_expr = re.sub(r" \| ", " or ", forbidden_expr)
forbidden_expr = re.sub(r" \%in\% ", " in ", forbidden_expr)
forbidden_expr = re.sub(r" [co]\(", " (", forbidden_expr)
forbidden_expr = expression_to_configspace(forbidden_expr, cs)
cs.add(forbidden_expr)
elif re.match(PCSConverter.irace_params_regex, line):
parameter = re.fullmatch(PCSConverter.irace_params_regex, line)
name = parameter.group("name")
parameter_type = parameter.group("type")
# NOTE: IRACE supports depedent parameter domains, e.g. parameters which
# domain relies on another parameter: p2 "--p2" r ("p1", "p1 + 10")"
# and is limited to the operators: +,-, *, /, %%, min, max
values = ast.literal_eval(parameter.group("values"))
scale = parameter.group("scale")
conditions = parameter.group("conditions")
comment = parameter.group("comment")
# Convert categorical / ordinal values to strings
if parameter_type == "c" or parameter_type == "o":
values = [str(i) for i in values]
else:
if any(
operator in values
for operator in ["+", "-", "*", "/", "%", "min", "max"]
):
raise ValueError(
"Dependent parameter domains not supported by "
"ConfigurationSpace."
)
lower_bound, upper_bound = values[0], values[1]
if parameter_type == "c":
csparam = ConfigSpace.CategoricalHyperparameter(
name=name,
choices=values,
meta=comment,
)
elif parameter_type == "o":
csparam = ConfigSpace.OrdinalHyperparameter(
name=name,
sequence=values,
meta=comment,
)
elif parameter_type == "r":
csparam = ConfigSpace.UniformFloatHyperparameter(
name=name,
lower=float(lower_bound),
upper=float(upper_bound),
log=scale == "log",
meta=comment,
)
elif parameter_type == "i":
csparam = ConfigSpace.UniformIntegerHyperparameter(
name=name,
lower=int(lower_bound),
upper=int(upper_bound),
log=scale == "log",
meta=comment,
)
cs.add(csparam)
if conditions:
# Convert the expression to standardised format
conditions = re.sub(r" \& ", " and ", conditions)
conditions = re.sub(r" \| ", " or ", conditions)
conditions = re.sub(r" \%in\% ", " in ", conditions)
conditions = re.sub(r" [cior]\(", " (", conditions)
conditions = conditions.strip()
standardised_conditions.append((csparam, conditions))
else:
raise Exception(f"IRACE PCS expression not recognised on line:\n{line}")
# We can only add the conditions after all parameters have been parsed:
for csparam, conditions in standardised_conditions:
conditions = expression_to_configspace(
conditions, cs, target_parameter=csparam
)
cs.add(conditions)
return cs
[docs]
@staticmethod
def export(
configspace: ConfigurationSpace, pcs_format: PCSConvention, file: Path
) -> str | None:
"""Exports a config space object to a specific PCS convention.
Args:
configspace: ConfigurationSpace, the space to convert
pcs_format: PCSConvention, the convention to conver to
file: Path, the file to write to. If None, will return string.
Returns:
String in case of no file path given, otherwise None.
"""
# Create pcs table
declaration = (
f"### {pcs_format.name} Parameter Configuration Space file "
"generated by Sparkle\n"
)
rows = []
extra_rows = []
if pcs_format == PCSConvention.SMAC or pcs_format == PCSConvention.ParamILS:
import numpy as np
granularity = 20 # For ParamILS. TODO: Make it parametrisable
header = [
"# Parameter Name",
"type",
"values",
"default value",
"scale",
"comments",
]
parameter_map = {
ConfigSpace.UniformFloatHyperparameter: "real",
ConfigSpace.UniformIntegerHyperparameter: "integer",
ConfigSpace.CategoricalHyperparameter: "categorical",
ConfigSpace.OrdinalHyperparameter: "ordinal",
}
for parameter in list(configspace.values()):
log = False
if isinstance(
parameter, ConfigSpace.hyperparameters.NumericalHyperparameter
):
log = parameter.log
if pcs_format == PCSConvention.ParamILS: # Discretise
dtype = (
float
if isinstance(
parameter, ConfigSpace.UniformFloatHyperparameter
)
else int
)
if log:
lower = 1e-5 if parameter.lower == 0 else parameter.lower
domain = list(
np.unique(
np.geomspace(
lower,
parameter.upper,
granularity,
dtype=dtype,
)
)
)
else:
domain = list(
np.linspace(
parameter.lower,
parameter.upper,
granularity,
dtype=dtype,
)
)
if dtype(parameter.default_value) not in domain: # Add default
domain += [dtype(parameter.default_value)]
domain = list(set(domain)) # Ensure unique values only
domain.sort()
domain = "{" + ",".join([str(i) for i in domain]) + "}"
else: # SMAC2 takes ranges
domain = f"[{parameter.lower}, {parameter.upper}]"
else:
domain = "{" + ",".join(parameter.choices) + "}"
rows.append(
[
parameter.name,
parameter_map[type(parameter)]
if not pcs_format == PCSConvention.ParamILS
else "",
domain,
f"[{parameter.default_value}]",
"log"
if log and not pcs_format == PCSConvention.ParamILS
else "",
f"# {parameter.meta}",
]
)
if configspace.conditions:
extra_rows.extend(["", "# Parameter Conditions"])
for condition in configspace.conditions:
condition_str = str(condition)
condition_str = condition_str.replace("(", "").replace(
")", ""
) # Brackets not allowed
condition_str = condition_str.replace("'", "") # No quotes needed
condition_str = condition_str.replace(
f"{condition.child.name} | ", ""
).strip()
condition_str = f"{condition.child.name} | " + condition_str
if pcs_format == PCSConvention.ParamILS and re.search(
r"[<>!=]+|[<>]=|[!=]=", condition_str
):
# TODO: Translate condition ParamILS expression (in)
continue
extra_rows.append(condition_str)
if configspace.forbidden_clauses:
extra_rows.extend(["", "# Forbidden Expressions"])
for forbidden in configspace.forbidden_clauses:
forbidden_str = str(forbidden).replace("Forbidden: ", "")
forbidden_str = forbidden_str.replace("(", "{").replace(")", "}")
forbidden_str = forbidden_str.replace("'", "")
if pcs_format == PCSConvention.ParamILS and re.search(
r"[<>!=]+|[<>]=|[!=]=", forbidden_str
):
# TODO: Translate condition ParamILS expression (in)
continue
extra_rows.append(forbidden_str)
elif pcs_format == PCSConvention.IRACE:
digits = 4 # Number of digits after decimal point required
parameter_map = {
ConfigSpace.UniformFloatHyperparameter: "r",
ConfigSpace.UniformIntegerHyperparameter: "i",
ConfigSpace.CategoricalHyperparameter: "c",
ConfigSpace.OrdinalHyperparameter: "o",
}
header = [
"# Parameter Name",
"switch",
"type",
"values",
"[conditions (using R syntax)]",
"comments",
]
for parameter in list(configspace.values()):
parameter_conditions = []
for c in configspace.conditions:
if c.child == parameter:
parameter_conditions.append(c)
parameter_type = parameter_map[type(parameter)]
condition_type = (
parameter_type
if type(parameter) is ConfigSpace.CategoricalHyperparameter
else ""
)
condition_str = " || ".join([str(c) for c in parameter_conditions])
condition_str = condition_str.replace(f"{parameter.name} | ", "")
condition_str = condition_str.replace(" in ", f" %in% {condition_type}")
condition_str = condition_str.replace("{", "(").replace("}", ")")
condition_str = condition_str.replace("'", "") # No quotes around string
condition_str = condition_str.replace(" && ", " & ").replace(
" || ", " | "
)
if isinstance(
parameter, ConfigSpace.hyperparameters.NumericalHyperparameter
):
if parameter.log:
parameter_type += ",log"
domain = f"({parameter.lower}, {parameter.upper})"
if isinstance(
parameter, ConfigSpace.hyperparameters.FloatHyperparameter
):
# Format the floats to interpret the number of digits
# (Includes scientific notation)
lower, upper = (
format(parameter.lower, ".16f").strip("0"),
format(parameter.upper, ".16f").strip("0"),
)
param_digits = max(
len(str(lower).split(".")[1]),
len(str(upper).split(".")[1]),
)
# Check if we need to update the global digits
if param_digits > digits:
digits = param_digits
else:
domain = "(" + ",".join(parameter.choices) + ")"
rows.append(
[
parameter.name,
f'"--{parameter.name} "',
parameter_type,
domain, # Parameter range/domain
f"| {condition_str}" if condition_str else "",
f"# {parameter.meta}" if parameter.meta else "",
]
)
if configspace.forbidden_clauses:
extra_rows.extend(["", "[forbidden]"])
for forbidden_expression in configspace.forbidden_clauses:
forbidden_str = str(forbidden_expression).replace("Forbidden: ", "")
if " in " in forbidden_str:
type_char = parameter_map[
type(forbidden_expression.hyperparameter)
]
forbidden_str.replace(" in ", f" %in% {type_char}")
forbidden_str = forbidden_str.replace(" && ", " & ").replace(
" || ", " | "
)
extra_rows.append(forbidden_str)
if digits > 4: # Default digits is 4
extra_rows.extend(["", "[global]", f"digits={digits}"])
output = (
declaration
+ tabulate.tabulate(rows, headers=header, tablefmt="plain", numalign="left")
+ "\n"
)
if extra_rows:
output += "\n".join(extra_rows) + "\n"
if file is None:
return output
file.open("w+").write(output)
[docs]
@staticmethod
def validate(file_path: Path) -> bool:
"""Validate a pcs file."""
# TODO: Determine which format
# TODO: Verify each line, and the order in which they were written
return