Coverage for sparkle/CLI/construct_portfolio_selector.py: 0%

99 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-09-27 09:10 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to construct a portfolio selector.""" 

3import sys 

4import argparse 

5from pathlib import Path 

6 

7import runrunner as rrr 

8from runrunner.base import Runner 

9 

10from sparkle.CLI.help import global_variables as gv 

11from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

12from sparkle.CLI.help import logging as sl 

13from sparkle.platform.settings_objects import SettingState 

14from sparkle.CLI.help import argparse_custom as ac 

15from sparkle.CLI.help.reporting_scenario import Scenario 

16from sparkle.platform import CommandName, COMMAND_DEPENDENCIES 

17from sparkle.CLI.initialise import check_for_initialise 

18 

19 

20def parser_function() -> argparse.ArgumentParser: 

21 """Define the command line arguments.""" 

22 parser = argparse.ArgumentParser() 

23 parser.add_argument(*ac.RecomputePortfolioSelectorArgument.names, 

24 **ac.RecomputePortfolioSelectorArgument.kwargs) 

25 parser.add_argument(*ac.RecomputeMarginalContributionForSelectorArgument.names, 

26 **ac.RecomputeMarginalContributionForSelectorArgument.kwargs) 

27 parser.add_argument(*ac.SelectorTimeoutArgument.names, 

28 **ac.SelectorTimeoutArgument.kwargs) 

29 parser.add_argument(*ac.SparkleObjectiveArgument.names, 

30 **ac.SparkleObjectiveArgument.kwargs) 

31 parser.add_argument(*ac.SelectorAblationArgument.names, 

32 **ac.SelectorAblationArgument.kwargs) 

33 parser.add_argument(*ac.RunOnArgument.names, 

34 **ac.RunOnArgument.kwargs) 

35 return parser 

36 

37 

38def judge_exist_remaining_jobs(feature_data_csv: Path, 

39 performance_data_csv: Path) -> bool: 

40 """Return whether there are remaining feature or performance computation jobs.""" 

41 feature_data = FeatureDataFrame(feature_data_csv) 

42 if feature_data.has_missing_vectors(): 

43 return True 

44 performance_data = PerformanceDataFrame(performance_data_csv) 

45 return performance_data.has_missing_values 

46 

47 

48if __name__ == "__main__": 

49 # Log command call 

50 sl.log_command(sys.argv) 

51 

52 # Define command line arguments 

53 parser = parser_function() 

54 

55 # Process command line arguments 

56 args = parser.parse_args() 

57 selector_timeout = args.selector_timeout 

58 flag_recompute_portfolio = args.recompute_portfolio_selector 

59 flag_recompute_marg_cont = args.recompute_marginal_contribution 

60 solver_ablation = args.solver_ablation 

61 

62 check_for_initialise( 

63 COMMAND_DEPENDENCIES[CommandName.CONSTRUCT_PORTFOLIO_SELECTOR] 

64 ) 

65 

66 if ac.set_by_user(args, "objectives"): 

67 gv.settings().set_general_sparkle_objectives( 

68 args.objectives, SettingState.CMD_LINE 

69 ) 

70 if args.run_on is not None: 

71 gv.settings().set_run_on( 

72 args.run_on.value, SettingState.CMD_LINE) 

73 run_on = gv.settings().get_run_on() 

74 

75 print("Start constructing Sparkle portfolio selector ...") 

76 selector = gv.settings().get_general_sparkle_selector() 

77 

78 flag_judge_exist_remaining_jobs = judge_exist_remaining_jobs( 

79 gv.settings().DEFAULT_feature_data_path, 

80 gv.settings().DEFAULT_performance_data_path) 

81 

82 if flag_judge_exist_remaining_jobs: 

83 print("There remain unperformed feature computation jobs or performance " 

84 "computation jobs!") 

85 print("Please first execute all unperformed jobs before constructing Sparkle " 

86 "portfolio selector") 

87 print("Sparkle portfolio selector is not successfully constructed!") 

88 sys.exit(-1) 

89 

90 # Selector (AutoFolio) cannot handle cutoff time less than 2, adjust if needed 

91 cutoff_time = max(gv.settings().get_general_target_cutoff_time(), 2) 

92 

93 # Determine the objective function 

94 objective = gv.settings().get_general_sparkle_objectives()[0] 

95 

96 performance_data = PerformanceDataFrame(gv.settings().DEFAULT_performance_data_path) 

97 feature_data = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

98 

99 if feature_data.has_missing_value(): 

100 print("WARNING: Missing values in the feature data, will be imputed as the mean " 

101 "value of all other non-missing values! Imputing all missing values...") 

102 feature_data.impute_missing_values() 

103 

104 # TODO: Allow user to specify subsets of data to be used 

105 

106 # Selector is named after the solvers it can predict, sort for permutation invariance 

107 solvers = sorted([s.name for s in gv.settings().DEFAULT_solver_dir.iterdir()]) 

108 selection_scenario_path = ( 

109 gv.settings().DEFAULT_selection_output 

110 / gv.settings().DEFAULT_general_sparkle_selector.name 

111 / "_".join(solvers)) 

112 

113 # Update latest scenario 

114 gv.latest_scenario().set_selection_scenario_path(selection_scenario_path) 

115 gv.latest_scenario().set_latest_scenario(Scenario.SELECTION) 

116 # Set to default to overwrite possible old path 

117 gv.latest_scenario().set_selection_test_case_directory() 

118 

119 selector_path = selection_scenario_path / "portfolio_selector" 

120 sbatch_options = gv.settings().get_slurm_extra_options(as_args=True) 

121 if selector_path.exists() and not flag_recompute_portfolio: 

122 print("Portfolio selector already exists. Set the recompute flag to re-create.") 

123 sys.exit() 

124 

125 selector_path.parent.mkdir(exist_ok=True, parents=True) 

126 

127 selector_run = selector.construct(selector_path, 

128 performance_data, 

129 feature_data, 

130 objective, 

131 cutoff_time, 

132 selector_timeout, 

133 run_on=run_on, 

134 sbatch_options=sbatch_options, 

135 base_dir=sl.caller_log_dir) 

136 if run_on == Runner.LOCAL: 

137 print("Sparkle portfolio selector constructed!") 

138 else: 

139 print("Sparkle portfolio selector constructor running...") 

140 

141 dependencies = [selector_run] 

142 if solver_ablation: 

143 for solver in performance_data.solvers: 

144 solver_name = Path(solver).name 

145 ablate_solver_dir = selection_scenario_path / f"ablate_{solver_name}" 

146 ablate_solver_selector = ablate_solver_dir / "portfolio_selector" 

147 if (ablate_solver_selector.exists() and not flag_recompute_portfolio): 

148 print(f"Portfolio selector without {solver_name} already exists. " 

149 "Set the recompute flag to re-create.") 

150 continue 

151 ablate_solver_dir.mkdir(exist_ok=True, parents=True) 

152 ablated_performance_data = performance_data.copy() 

153 ablated_performance_data.remove_solver(solver) 

154 ablated_run = selector.construct(ablate_solver_selector, 

155 ablated_performance_data, 

156 feature_data, 

157 objective, 

158 cutoff_time, 

159 selector_timeout, 

160 run_on=run_on, 

161 sbatch_options=sbatch_options, 

162 base_dir=sl.caller_log_dir) 

163 dependencies.append(ablated_run) 

164 if run_on == Runner.LOCAL: 

165 print(f"Portfolio selector without {solver_name} constructed!") 

166 else: 

167 print(f"Portfolio selector without {solver_name} constructor running...") 

168 with_actual = "--actual" if solver_ablation else "" 

169 cmd = (f"sparkle/CLI/compute_marginal_contribution.py --perfect {with_actual} " 

170 f"{ac.SparkleObjectiveArgument.names[0]} {objective}") 

171 

172 marginal_contribution = rrr.add_to_queue( 

173 runner=run_on, 

174 cmd=cmd, 

175 name=CommandName.COMPUTE_MARGINAL_CONTRIBUTION, 

176 base_dir=sl.caller_log_dir, 

177 dependencies=dependencies, 

178 sbatch_options=sbatch_options) 

179 dependencies.append(marginal_contribution) 

180 if run_on == Runner.LOCAL: 

181 marginal_contribution.wait() 

182 print("Selector marginal contribution computing done!") 

183 else: 

184 print(f"Running selector construction. Waiting for Slurm job(s) with id(s): " 

185 f"{', '.join([d.run_id for d in dependencies])}") 

186 

187 # Write used settings to file 

188 gv.settings().write_used_settings() 

189 # Write used scenario to file 

190 gv.latest_scenario().write_scenario_ini()