Coverage for sparkle/CLI/construct_portfolio_selector.py: 71%

103 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-05 14:48 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to construct a portfolio selector.""" 

3import sys 

4import argparse 

5from pathlib import Path 

6 

7import runrunner as rrr 

8from runrunner.base import Runner 

9 

10from sparkle.CLI.help import global_variables as gv 

11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

12from sparkle.CLI.help import logging as sl 

13from sparkle.platform.settings_objects import SettingState 

14from sparkle.CLI.help import argparse_custom as ac 

15from sparkle.CLI.help.reporting_scenario import Scenario 

16from sparkle.platform import CommandName, COMMAND_DEPENDENCIES 

17from sparkle.CLI.initialise import check_for_initialise 

18 

19 

20def parser_function() -> argparse.ArgumentParser: 

21 """Define the command line arguments.""" 

22 parser = argparse.ArgumentParser( 

23 description="Command to construct a portfolio selector over all known features " 

24 "solver performances.") 

25 parser.add_argument(*ac.RecomputePortfolioSelectorArgument.names, 

26 **ac.RecomputePortfolioSelectorArgument.kwargs) 

27 parser.add_argument(*ac.SelectorTimeoutArgument.names, 

28 **ac.SelectorTimeoutArgument.kwargs) 

29 parser.add_argument(*ac.SparkleObjectiveArgument.names, 

30 **ac.SparkleObjectiveArgument.kwargs) 

31 parser.add_argument(*ac.SelectorAblationArgument.names, 

32 **ac.SelectorAblationArgument.kwargs) 

33 parser.add_argument(*ac.RunOnArgument.names, 

34 **ac.RunOnArgument.kwargs) 

35 parser.add_argument(*ac.SettingsFileArgument.names, 

36 **ac.SettingsFileArgument.kwargs) 

37 return parser 

38 

39 

40def judge_exist_remaining_jobs(feature_data_csv: Path, 

41 performance_data_csv: Path) -> bool: 

42 """Return whether there are remaining feature or performance computation jobs.""" 

43 feature_data = FeatureDataFrame(feature_data_csv) 

44 if feature_data.has_missing_vectors(): 

45 return True 

46 performance_data = PerformanceDataFrame(performance_data_csv) 

47 return performance_data.has_missing_values 

48 

49 

50def main(argv: list[str]) -> None: 

51 """Main method of construct portfolio selector.""" 

52 # Log command call 

53 sl.log_command(sys.argv) 

54 

55 # Define command line arguments 

56 parser = parser_function() 

57 

58 # Process command line arguments 

59 args = parser.parse_args(argv) 

60 selector_timeout = args.selector_timeout 

61 flag_recompute_portfolio = args.recompute_portfolio_selector 

62 solver_ablation = args.solver_ablation 

63 

64 check_for_initialise( 

65 COMMAND_DEPENDENCIES[CommandName.CONSTRUCT_PORTFOLIO_SELECTOR] 

66 ) 

67 if ac.set_by_user(args, "settings_file"): 

68 gv.settings().read_settings_ini( 

69 args.settings_file, SettingState.CMD_LINE 

70 ) # Do first, so other command line options can override settings from the file 

71 if ac.set_by_user(args, "objectives"): 

72 gv.settings().set_general_sparkle_objectives( 

73 args.objectives, SettingState.CMD_LINE 

74 ) 

75 if args.run_on is not None: 

76 gv.settings().set_run_on( 

77 args.run_on.value, SettingState.CMD_LINE) 

78 run_on = gv.settings().get_run_on() 

79 

80 print("Start constructing Sparkle portfolio selector ...") 

81 selector = gv.settings().get_general_sparkle_selector() 

82 

83 flag_judge_exist_remaining_jobs = judge_exist_remaining_jobs( 

84 gv.settings().DEFAULT_feature_data_path, 

85 gv.settings().DEFAULT_performance_data_path) 

86 

87 if flag_judge_exist_remaining_jobs: 

88 print("There remain unperformed feature computation jobs or performance " 

89 "computation jobs!") 

90 print("Please first execute all unperformed jobs before constructing Sparkle " 

91 "portfolio selector") 

92 print("Sparkle portfolio selector is not successfully constructed!") 

93 sys.exit(-1) 

94 

95 # Selector (AutoFolio) cannot handle cutoff time less than 2, adjust if needed 

96 cutoff_time = max(gv.settings().get_general_target_cutoff_time(), 2) 

97 

98 # Determine the objective function 

99 objective = gv.settings().get_general_sparkle_objectives()[0] 

100 

101 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path) 

102 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

103 

104 if feature_data.has_missing_value(): 

105 print("WARNING: Missing values in the feature data, will be imputed as the mean " 

106 "value of all other non-missing values! Imputing all missing values...") 

107 feature_data.impute_missing_values() 

108 

109 # TODO: Allow user to specify subsets of data to be used 

110 

111 # Selector is named after the solvers it can predict, sort for permutation invariance 

112 solvers = sorted([s.name for s in gv.settings().DEFAULT_solver_dir.iterdir()]) 

113 selection_scenario_path = ( 

114 gv.settings().DEFAULT_selection_output 

115 / gv.settings().DEFAULT_general_sparkle_selector.name 

116 / "_".join(solvers)) 

117 

118 # Update latest scenario 

119 gv.latest_scenario().set_selection_scenario_path(selection_scenario_path) 

120 gv.latest_scenario().set_latest_scenario(Scenario.SELECTION) 

121 # Set to default to overwrite possible old path 

122 gv.latest_scenario().set_selection_test_case_directory() 

123 

124 selector_path = selection_scenario_path / "portfolio_selector" 

125 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True) 

126 if selector_path.exists() and not flag_recompute_portfolio: 

127 print("Portfolio selector already exists. Set the recompute flag to re-create.") 

128 sys.exit() 

129 

130 selector_path.parent.mkdir(exist_ok=True, parents=True) 

131 

132 selector_run = selector.construct(selector_path, 

133 performance_data, 

134 feature_data, 

135 objective, 

136 cutoff_time, 

137 selector_timeout, 

138 run_on=run_on, 

139 sbatch_options=sbatch_options, 

140 base_dir=sl.caller_log_dir) 

141 if run_on == Runner.LOCAL: 

142 print("Sparkle portfolio selector constructed!") 

143 else: 

144 print("Sparkle portfolio selector constructor running...") 

145 

146 dependencies = [selector_run] 

147 if solver_ablation: 

148 for solver in performance_data.solvers: 

149 solver_name = Path(solver).name 

150 ablate_solver_dir = selection_scenario_path / f"ablate_{solver_name}" 

151 ablate_solver_selector = ablate_solver_dir / "portfolio_selector" 

152 if (ablate_solver_selector.exists() and not flag_recompute_portfolio): 

153 print(f"Portfolio selector without {solver_name} already exists. " 

154 "Set the recompute flag to re-create.") 

155 continue 

156 ablate_solver_dir.mkdir(exist_ok=True, parents=True) 

157 ablated_performance_data = performance_data.copy() 

158 ablated_performance_data.remove_solver(solver) 

159 ablated_run = selector.construct(ablate_solver_selector, 

160 ablated_performance_data, 

161 feature_data, 

162 objective, 

163 cutoff_time, 

164 selector_timeout, 

165 run_on=run_on, 

166 sbatch_options=sbatch_options, 

167 base_dir=sl.caller_log_dir) 

168 dependencies.append(ablated_run) 

169 if run_on == Runner.LOCAL: 

170 print(f"Portfolio selector without {solver_name} constructed!") 

171 else: 

172 print(f"Portfolio selector without {solver_name} constructor running...") 

173 

174 # Compute the marginal contribution 

175 with_actual = "--actual" if solver_ablation else "" 

176 cmd = (f"sparkle/CLI/compute_marginal_contribution.py --perfect {with_actual} " 

177 f"{ac.SparkleObjectiveArgument.names[0]} {objective}") 

178 marginal_contribution = rrr.add_to_queue( 

179 runner=run_on, 

180 cmd=cmd, 

181 name=CommandName.COMPUTE_MARGINAL_CONTRIBUTION, 

182 base_dir=sl.caller_log_dir, 

183 dependencies=dependencies, 

184 sbatch_options=sbatch_options) 

185 dependencies.append(marginal_contribution) 

186 if run_on == Runner.LOCAL: 

187 marginal_contribution.wait() 

188 print("Selector marginal contribution computing done!") 

189 else: 

190 print(f"Running selector construction. Waiting for Slurm job(s) with id(s): " 

191 f"{', '.join([d.run_id for d in dependencies])}") 

192 

193 # Write used settings to file 

194 gv.settings().write_used_settings() 

195 # Write used scenario to file 

196 gv.latest_scenario().write_scenario_ini() 

197 sys.exit(0) 

198 

199 

200if __name__ == "__main__": 

201 main(sys.argv[1:])