Coverage for sparkle/CLI/construct_portfolio_selector.py: 72%

107 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-07 15:22 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to construct a portfolio selector.""" 

3import sys 

4import argparse 

5from pathlib import Path 

6 

7import runrunner as rrr 

8from runrunner.base import Runner 

9 

10from sparkle.platform.settings_objects import SettingState 

11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

12from sparkle.types import resolve_objective 

13from sparkle.CLI.help import global_variables as gv 

14from sparkle.CLI.help import logging as sl 

15from sparkle.CLI.help import argparse_custom as ac 

16from sparkle.CLI.help.reporting_scenario import Scenario 

17from sparkle.CLI.initialise import check_for_initialise 

18 

19 

20def parser_function() -> argparse.ArgumentParser: 

21 """Define the command line arguments.""" 

22 parser = argparse.ArgumentParser( 

23 description="Command to construct a portfolio selector over all known features " 

24 "solver performances.") 

25 parser.add_argument(*ac.RecomputePortfolioSelectorArgument.names, 

26 **ac.RecomputePortfolioSelectorArgument.kwargs) 

27 parser.add_argument(*ac.SelectorTimeoutArgument.names, 

28 **ac.SelectorTimeoutArgument.kwargs) 

29 parser.add_argument(*ac.ObjectiveArgument.names, 

30 **ac.ObjectiveArgument.kwargs) 

31 parser.add_argument(*ac.SelectorAblationArgument.names, 

32 **ac.SelectorAblationArgument.kwargs) 

33 parser.add_argument(*ac.RunOnArgument.names, 

34 **ac.RunOnArgument.kwargs) 

35 parser.add_argument(*ac.SettingsFileArgument.names, 

36 **ac.SettingsFileArgument.kwargs) 

37 return parser 

38 

39 

40def judge_exist_remaining_jobs(feature_data_csv: Path, 

41 performance_data_csv: Path) -> bool: 

42 """Return whether there are remaining feature or performance computation jobs.""" 

43 feature_data = FeatureDataFrame(feature_data_csv) 

44 performance_data = PerformanceDataFrame(performance_data_csv) 

45 missing_features = feature_data.has_missing_vectors() 

46 missing_performances = performance_data.has_missing_values 

47 if missing_features: 

48 print("There remain unperformed feature computation jobs!") 

49 if missing_performances: 

50 print("There remain unperformed performance computation jobs!") 

51 if missing_features or missing_performances: 

52 print("Please first execute all unperformed jobs before constructing Sparkle " 

53 "portfolio selector") 

54 print("Sparkle portfolio selector is not successfully constructed!") 

55 sys.exit(-1) 

56 

57 

58def main(argv: list[str]) -> None: 

59 """Main method of construct portfolio selector.""" 

60 # Log command call 

61 sl.log_command(sys.argv) 

62 check_for_initialise() 

63 

64 # Define command line arguments 

65 parser = parser_function() 

66 

67 # Process command line arguments 

68 args = parser.parse_args(argv) 

69 selector_timeout = args.selector_timeout 

70 flag_recompute_portfolio = args.recompute_portfolio_selector 

71 solver_ablation = args.solver_ablation 

72 

73 if ac.set_by_user(args, "settings_file"): 

74 gv.settings().read_settings_ini( 

75 args.settings_file, SettingState.CMD_LINE 

76 ) # Do first, so other command line options can override settings from the file 

77 if ac.set_by_user(args, "objective"): 

78 objective = resolve_objective(args.objective) 

79 else: 

80 objective = gv.settings().get_general_sparkle_objectives()[0] 

81 print("WARNING: No objective specified, defaulting to first objective from " 

82 f"settings ({objective}).") 

83 if args.run_on is not None: 

84 gv.settings().set_run_on( 

85 args.run_on.value, SettingState.CMD_LINE) 

86 run_on = gv.settings().get_run_on() 

87 

88 print("Start constructing Sparkle portfolio selector ...") 

89 selector = gv.settings().get_general_sparkle_selector() 

90 

91 judge_exist_remaining_jobs( 

92 gv.settings().DEFAULT_feature_data_path, 

93 gv.settings().DEFAULT_performance_data_path) 

94 

95 # Selector (AutoFolio) cannot handle cutoff time less than 2, adjust if needed 

96 cutoff_time = max(gv.settings().get_general_target_cutoff_time(), 2) 

97 

98 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path) 

99 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

100 

101 if feature_data.has_missing_value(): 

102 print("WARNING: Missing values in the feature data, will be imputed as the mean " 

103 "value of all other non-missing values! Imputing all missing values...") 

104 feature_data.impute_missing_values() 

105 

106 # TODO: Allow user to specify subsets of data to be used 

107 

108 # Selector is named after the solvers it can predict, sort for permutation invariance 

109 solvers = sorted([s.name for s in gv.settings().DEFAULT_solver_dir.iterdir()]) 

110 selection_scenario_path = ( 

111 gv.settings().DEFAULT_selection_output 

112 / gv.settings().DEFAULT_general_sparkle_selector.name 

113 / "_".join(solvers)) 

114 

115 # Update latest scenario 

116 gv.latest_scenario().set_selection_scenario_path(selection_scenario_path) 

117 gv.latest_scenario().set_latest_scenario(Scenario.SELECTION) 

118 # Set to default to overwrite possible old path 

119 gv.latest_scenario().set_selection_test_case_directory() 

120 

121 selector_path = selection_scenario_path / "portfolio_selector" 

122 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True) 

123 if selector_path.exists() and not flag_recompute_portfolio: 

124 print("Portfolio selector already exists. Set the recompute flag to re-create.") 

125 sys.exit() 

126 

127 selector_path.parent.mkdir(exist_ok=True, parents=True) 

128 

129 selector_run = selector.construct(selector_path, 

130 performance_data, 

131 feature_data, 

132 objective, 

133 cutoff_time, 

134 selector_timeout, 

135 run_on=run_on, 

136 sbatch_options=sbatch_options, 

137 base_dir=sl.caller_log_dir) 

138 if run_on == Runner.LOCAL: 

139 print("Sparkle portfolio selector constructed!") 

140 else: 

141 print("Sparkle portfolio selector constructor running...") 

142 

143 dependencies = [selector_run] 

144 if solver_ablation: 

145 for solver in performance_data.solvers: 

146 solver_name = Path(solver).name 

147 ablate_solver_dir = selection_scenario_path / f"ablate_{solver_name}" 

148 ablate_solver_selector = ablate_solver_dir / "portfolio_selector" 

149 if (ablate_solver_selector.exists() and not flag_recompute_portfolio): 

150 print(f"Portfolio selector without {solver_name} already exists. " 

151 "Set the recompute flag to re-create.") 

152 continue 

153 ablate_solver_dir.mkdir(exist_ok=True, parents=True) 

154 ablated_performance_data = performance_data.clone() 

155 ablated_performance_data.remove_solver(solver) 

156 ablated_run = selector.construct(ablate_solver_selector, 

157 ablated_performance_data, 

158 feature_data, 

159 objective, 

160 cutoff_time, 

161 selector_timeout, 

162 run_on=run_on, 

163 sbatch_options=sbatch_options, 

164 base_dir=sl.caller_log_dir) 

165 dependencies.append(ablated_run) 

166 if run_on == Runner.LOCAL: 

167 print(f"Portfolio selector without {solver_name} constructed!") 

168 else: 

169 print(f"Portfolio selector without {solver_name} constructor running...") 

170 

171 # Compute the marginal contribution 

172 with_actual = "--actual" if solver_ablation else "" 

173 cmd = (f"python3 sparkle/CLI/compute_marginal_contribution.py --perfect " 

174 f"{with_actual} {ac.ObjectivesArgument.names[0]} {objective}") 

175 solver_names = ", ".join([Path(s).name for s in performance_data.solvers]) 

176 marginal_contribution = rrr.add_to_queue( 

177 runner=run_on, 

178 cmd=cmd, 

179 name=f"Marginal Contribution computation: {solver_names}", 

180 base_dir=sl.caller_log_dir, 

181 dependencies=dependencies, 

182 sbatch_options=sbatch_options) 

183 dependencies.append(marginal_contribution) 

184 if run_on == Runner.LOCAL: 

185 marginal_contribution.wait() 

186 print("Selector marginal contribution computing done!") 

187 else: 

188 print(f"Running selector construction. Waiting for Slurm job(s) with id(s): " 

189 f"{', '.join([d.run_id for d in dependencies])}") 

190 

191 # Write used settings to file 

192 gv.settings().write_used_settings() 

193 # Write used scenario to file 

194 gv.latest_scenario().write_scenario_ini() 

195 sys.exit(0) 

196 

197 

198if __name__ == "__main__": 

199 main(sys.argv[1:])