Coverage for sparkle/CLI/run_portfolio_selector.py: 89%

76 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-07 15:22 +0000

1#!/usr/bin/env python3 

2"""Sparkle command to execute a portfolio selector.""" 

3 

4import sys 

5import argparse 

6from pathlib import PurePath, Path 

7 

8import runrunner as rrr 

9from runrunner import Runner 

10 

11from sparkle.CLI.help import global_variables as gv 

12from sparkle.CLI.help import logging as sl 

13from sparkle.platform.settings_objects import Settings, SettingState 

14from sparkle.CLI.help import argparse_custom as ac 

15from sparkle.structures import PerformanceDataFrame, FeatureDataFrame 

16from sparkle.CLI.help.reporting_scenario import Scenario 

17from sparkle.CLI.initialise import check_for_initialise 

18from sparkle.CLI.help.nicknames import resolve_object_name 

19from sparkle.instance import Instance_Set 

20from sparkle.CLI.compute_features import compute_features 

21 

22 

23def parser_function() -> argparse.ArgumentParser: 

24 """Define the command line arguments.""" 

25 parser = argparse.ArgumentParser( 

26 description="Run a portfolio selector on instance (set), determine which solver " 

27 "is most likely to perform well and run it on the instance (set).") 

28 parser.add_argument(*ac.InstancePathPositional.names, 

29 **ac.InstancePathPositional.kwargs) 

30 parser.add_argument(*ac.RunOnArgument.names, 

31 **ac.RunOnArgument.kwargs) 

32 parser.add_argument(*ac.SettingsFileArgument.names, 

33 **ac.SettingsFileArgument.kwargs) 

34 return parser 

35 

36 

37def main(argv: list[str]) -> None: 

38 """Main function of the run portfolio selector command.""" 

39 # Log command call 

40 sl.log_command(sys.argv) 

41 check_for_initialise() 

42 

43 # Define command line arguments 

44 parser = parser_function() 

45 

46 # Process command line arguments 

47 args = parser.parse_args(argv) 

48 

49 if ac.set_by_user(args, "settings_file"): 

50 gv.settings().read_settings_ini( 

51 args.settings_file, SettingState.CMD_LINE 

52 ) # Do first, so other command line options can override settings from the file 

53 if args.run_on is not None: 

54 gv.settings().set_run_on(args.run_on.value, SettingState.CMD_LINE) 

55 

56 # Compare current settings to latest.ini 

57 prev_settings = Settings(PurePath("Settings/latest.ini")) 

58 Settings.check_settings_changes(gv.settings(), prev_settings) 

59 

60 data_set = resolve_object_name( 

61 args.instance_path, 

62 gv.file_storage_data_mapping[gv.instances_nickname_path], 

63 gv.settings().DEFAULT_instance_dir, Instance_Set) 

64 

65 if data_set is None: 

66 print("ERROR: The instance (set) could not be found. Please make sure the " 

67 "path is correct.") 

68 sys.exit(-1) 

69 

70 run_on = gv.settings().get_run_on() 

71 

72 selector_scenario = gv.latest_scenario().get_selection_scenario_path() 

73 selector_path = selector_scenario / "portfolio_selector" 

74 if not selector_path.exists() or not selector_path.is_file(): 

75 print("ERROR: The portfolio selector could not be found. Please make sure to " 

76 "first construct a portfolio selector.") 

77 sys.exit(-1) 

78 if len([p for p in gv.settings().DEFAULT_extractor_dir.iterdir()]) == 0: 

79 print("ERROR: No feature extractor added to Sparkle.") 

80 sys.exit(-1) 

81 

82 # Compute the features of the incoming instances 

83 test_case_path = selector_scenario / data_set.name 

84 test_case_path.mkdir(exist_ok=True) 

85 feature_dataframe = FeatureDataFrame(gv.settings().DEFAULT_feature_data_path) 

86 feature_dataframe.remove_instances(feature_dataframe.instances) 

87 feature_dataframe.csv_filepath = test_case_path / "feature_data.csv" 

88 feature_dataframe.add_instances(data_set.instance_paths) 

89 feature_dataframe.save_csv() 

90 feature_run = compute_features(feature_dataframe, recompute=False, run_on=run_on) 

91 

92 if run_on == Runner.LOCAL: 

93 feature_run.wait() 

94 objectives = gv.settings().get_general_sparkle_objectives() 

95 # Prepare performance data 

96 performance_data = PerformanceDataFrame( 

97 test_case_path / "performance_data.csv", 

98 objectives=objectives) 

99 for instance_name in data_set.instance_names: 

100 if instance_name not in performance_data.instances: 

101 performance_data.add_instance(instance_name) 

102 performance_data.add_solver(selector_path.name) 

103 performance_data.save_csv() 

104 # Update latest scenario 

105 gv.latest_scenario().set_selection_test_case_directory(test_case_path) 

106 gv.latest_scenario().set_latest_scenario(Scenario.SELECTION) 

107 # Write used scenario to file 

108 gv.latest_scenario().write_scenario_ini() 

109 

110 run_core = Path(__file__).parent.parent.resolve() /\ 

111 "CLI" / "core" / "run_portfolio_selector_core.py" 

112 cmd_list = [f"python3 {run_core} " 

113 f"--selector {selector_path} " 

114 f"--feature-data-csv {feature_dataframe.csv_filepath} " 

115 f"--performance-data-csv {performance_data.csv_filepath} " 

116 f"--instance {instance_path} " 

117 f"--log-dir {sl.caller_log_dir}" 

118 for instance_path in data_set.instance_paths] 

119 

120 selector_run = rrr.add_to_queue( 

121 runner=run_on, 

122 cmd=cmd_list, 

123 name=f"Portfolio Selector: {selector_path.name} on {data_set.name}", 

124 base_dir=sl.caller_log_dir, 

125 stdout=None, 

126 dependencies=feature_run if run_on == Runner.SLURM else None, 

127 sbatch_options=gv.settings().get_slurm_extra_options(as_args=True)) 

128 

129 if run_on == Runner.LOCAL: 

130 selector_run.wait() 

131 print("Running Sparkle portfolio selector done!") 

132 else: 

133 print("Sparkle portfolio selector is running ...") 

134 

135 # Write used settings to file 

136 gv.settings().write_used_settings() 

137 sys.exit(0) 

138 

139 

140if __name__ == "__main__": 

141 main(sys.argv[1:])