|
8 | 8 | from csle_common.dao.training.tabular_policy import TabularPolicy
|
9 | 9 | from csle_common.metastore.metastore_facade import MetastoreFacade
|
10 | 10 |
|
11 |
| -if __name__ == '__main__': |
12 |
| - emulation_name = "csle-level9-040" |
| 11 | +if __name__ == "__main__": |
| 12 | + emulation_name = "csle-level1-050" |
13 | 13 | emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
|
14 | 14 | if emulation_env_config is None:
|
15 |
| - raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}") |
| 15 | + raise ValueError( |
| 16 | + f"Could not find an emulation environment with the name: {emulation_name}" |
| 17 | + ) |
16 | 18 | simulation_name = "csle-intrusion-response-game-local-pomdp-defender-001"
|
17 | 19 | simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
|
18 | 20 | if simulation_env_config is None:
|
19 | 21 | raise ValueError(f"Could not find a simulation with name: {simulation_name}")
|
20 | 22 | experiment_config = ExperimentConfig(
|
21 | 23 | output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}dqn_clean_test",
|
22 |
| - title="DQN_clean test", random_seeds=[399, 98912, 999], agent_type=AgentType.DQN_CLEAN, |
| 24 | + title="DQN_clean test", |
| 25 | + random_seeds=[399, 98912, 999], |
| 26 | + agent_type=AgentType.DQN_CLEAN, |
23 | 27 | log_every=1000,
|
24 | 28 | hparams={
|
25 | 29 | constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam(
|
26 |
| - value=7, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER, |
27 |
| - descr="neurons per hidden layer of the policy network"), |
| 30 | + value=7, |
| 31 | + name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER, |
| 32 | + descr="neurons per hidden layer of the policy network", |
| 33 | + ), |
28 | 34 | constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam(
|
29 |
| - value=4, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS, |
30 |
| - descr="number of layers of the policy network"), |
| 35 | + value=4, |
| 36 | + name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS, |
| 37 | + descr="number of layers of the policy network", |
| 38 | + ), |
31 | 39 | agents_constants.DQN_CLEAN.EXP_FRAC: HParam(
|
32 |
| - value=0.5, name=agents_constants.DQN_CLEAN.EXP_FRAC, |
33 |
| - descr="the fraction of `total-timesteps it takes from start-e to go end-e"), |
| 40 | + value=0.5, |
| 41 | + name=agents_constants.DQN_CLEAN.EXP_FRAC, |
| 42 | + descr="the fraction of `total-timesteps it takes from start-e to go end-e", |
| 43 | + ), |
34 | 44 | agents_constants.DQN_CLEAN.TAU: HParam(
|
35 |
| - value=1.0, name=agents_constants.DQN_CLEAN.TAU, descr="target network update rate"), |
| 45 | + value=1.0, |
| 46 | + name=agents_constants.DQN_CLEAN.TAU, |
| 47 | + descr="target network update rate", |
| 48 | + ), |
36 | 49 | agents_constants.COMMON.BATCH_SIZE: HParam(
|
37 |
| - value=64, name=agents_constants.COMMON.BATCH_SIZE, descr="batch size for updates"), |
| 50 | + value=64, |
| 51 | + name=agents_constants.COMMON.BATCH_SIZE, |
| 52 | + descr="batch size for updates", |
| 53 | + ), |
38 | 54 | agents_constants.DQN_CLEAN.LEARNING_STARTS: HParam(
|
39 |
| - value=10000, name=agents_constants.DQN_CLEAN.LEARNING_STARTS, descr="timestep to start learning"), |
| 55 | + value=10000, |
| 56 | + name=agents_constants.DQN_CLEAN.LEARNING_STARTS, |
| 57 | + descr="timestep to start learning", |
| 58 | + ), |
40 | 59 | agents_constants.DQN_CLEAN.TRAIN_FREQ: HParam(
|
41 |
| - value=10, name=agents_constants.DQN_CLEAN.TRAIN_FREQ, descr="the frequency of training"), |
| 60 | + value=10, |
| 61 | + name=agents_constants.DQN_CLEAN.TRAIN_FREQ, |
| 62 | + descr="the frequency of training", |
| 63 | + ), |
42 | 64 | agents_constants.DQN_CLEAN.T_N_FREQ: HParam(
|
43 |
| - value=500, name=agents_constants.DQN_CLEAN.T_N_FREQ, |
44 |
| - descr="the batch size of sample from the reply memory"), |
| 65 | + value=500, |
| 66 | + name=agents_constants.DQN_CLEAN.T_N_FREQ, |
| 67 | + descr="the batch size of sample from the reply memory", |
| 68 | + ), |
45 | 69 | agents_constants.DQN_CLEAN.BUFFER_SIZE: HParam(
|
46 |
| - value=1000, name=agents_constants.DQN_CLEAN.BUFFER_SIZE, descr="the replay memory buffer size"), |
| 70 | + value=1000, |
| 71 | + name=agents_constants.DQN_CLEAN.BUFFER_SIZE, |
| 72 | + descr="the replay memory buffer size", |
| 73 | + ), |
47 | 74 | agents_constants.DQN_CLEAN.SAVE_MODEL: HParam(
|
48 |
| - value=False, name=agents_constants.DQN_CLEAN.SAVE_MODEL, descr="decision param for model saving"), |
| 75 | + value=False, |
| 76 | + name=agents_constants.DQN_CLEAN.SAVE_MODEL, |
| 77 | + descr="decision param for model saving", |
| 78 | + ), |
49 | 79 | agents_constants.COMMON.LEARNING_RATE: HParam(
|
50 |
| - value=2.4e-5, name=agents_constants.COMMON.LEARNING_RATE, |
51 |
| - descr="learning rate for updating the policy"), |
| 80 | + value=2.4e-5, |
| 81 | + name=agents_constants.COMMON.LEARNING_RATE, |
| 82 | + descr="learning rate for updating the policy", |
| 83 | + ), |
52 | 84 | agents_constants.DQN_CLEAN.NUM_STEPS: HParam(
|
53 |
| - value=164, name=agents_constants.DQN_CLEAN.NUM_STEPS, descr="number of steps in each time step"), |
| 85 | + value=164, |
| 86 | + name=agents_constants.DQN_CLEAN.NUM_STEPS, |
| 87 | + descr="number of steps in each time step", |
| 88 | + ), |
54 | 89 | constants.NEURAL_NETWORKS.DEVICE: HParam(
|
55 |
| - value="cpu", name=constants.NEURAL_NETWORKS.DEVICE, descr="the device to train on (cpu or cuda:x)"), |
| 90 | + value="cpu", |
| 91 | + name=constants.NEURAL_NETWORKS.DEVICE, |
| 92 | + descr="the device to train on (cpu or cuda:x)", |
| 93 | + ), |
56 | 94 | agents_constants.COMMON.NUM_PARALLEL_ENVS: HParam(
|
57 |
| - value=1, name=agents_constants.COMMON.NUM_PARALLEL_ENVS, |
58 |
| - descr="the nunmber of parallel environments for training"), |
| 95 | + value=1, |
| 96 | + name=agents_constants.COMMON.NUM_PARALLEL_ENVS, |
| 97 | + descr="the nunmber of parallel environments for training", |
| 98 | + ), |
59 | 99 | agents_constants.COMMON.GAMMA: HParam(
|
60 |
| - value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"), |
| 100 | + value=0.99, |
| 101 | + name=agents_constants.COMMON.GAMMA, |
| 102 | + descr="the discount factor", |
| 103 | + ), |
61 | 104 | agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam(
|
62 |
| - value=int(100000), name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS, |
63 |
| - descr="number of timesteps to train"), |
64 |
| - agents_constants.COMMON.EVAL_EVERY: HParam(value=1, name=agents_constants.COMMON.EVAL_EVERY, |
65 |
| - descr="training iterations between evaluations"), |
66 |
| - agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE, |
67 |
| - descr="the batch size for evaluation"), |
68 |
| - agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY, |
69 |
| - descr="how frequently to save the model"), |
| 105 | + value=int(100000), |
| 106 | + name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS, |
| 107 | + descr="number of timesteps to train", |
| 108 | + ), |
| 109 | + agents_constants.COMMON.EVAL_EVERY: HParam( |
| 110 | + value=1, |
| 111 | + name=agents_constants.COMMON.EVAL_EVERY, |
| 112 | + descr="training iterations between evaluations", |
| 113 | + ), |
| 114 | + agents_constants.COMMON.EVAL_BATCH_SIZE: HParam( |
| 115 | + value=100, |
| 116 | + name=agents_constants.COMMON.EVAL_BATCH_SIZE, |
| 117 | + descr="the batch size for evaluation", |
| 118 | + ), |
| 119 | + agents_constants.COMMON.SAVE_EVERY: HParam( |
| 120 | + value=10000, |
| 121 | + name=agents_constants.COMMON.SAVE_EVERY, |
| 122 | + descr="how frequently to save the model", |
| 123 | + ), |
70 | 124 | agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
|
71 |
| - value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, |
72 |
| - descr="confidence interval"), |
| 125 | + value=0.95, |
| 126 | + name=agents_constants.COMMON.CONFIDENCE_INTERVAL, |
| 127 | + descr="confidence interval", |
| 128 | + ), |
73 | 129 | agents_constants.COMMON.MAX_ENV_STEPS: HParam(
|
74 |
| - value=500, name=agents_constants.COMMON.MAX_ENV_STEPS, |
75 |
| - descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), |
| 130 | + value=500, |
| 131 | + name=agents_constants.COMMON.MAX_ENV_STEPS, |
| 132 | + descr="maximum number of steps in the environment (for envs with infinite horizon generally)", |
| 133 | + ), |
76 | 134 | agents_constants.COMMON.RUNNING_AVERAGE: HParam(
|
77 |
| - value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, |
78 |
| - descr="the number of samples to include when computing the running avg"), |
79 |
| - agents_constants.COMMON.L: HParam(value=3, name=agents_constants.COMMON.L, |
80 |
| - descr="the number of stop actions") |
| 135 | + value=100, |
| 136 | + name=agents_constants.COMMON.RUNNING_AVERAGE, |
| 137 | + descr="the number of samples to include when computing the running avg", |
| 138 | + ), |
| 139 | + agents_constants.COMMON.L: HParam( |
| 140 | + value=3, |
| 141 | + name=agents_constants.COMMON.L, |
| 142 | + descr="the number of stop actions", |
| 143 | + ), |
81 | 144 | },
|
82 |
| - player_type=PlayerType.DEFENDER, player_idx=0 |
| 145 | + player_type=PlayerType.DEFENDER, |
| 146 | + player_idx=0, |
83 | 147 | )
|
84 | 148 | simulation_env_config.simulation_env_input_config.attacker_strategy = TabularPolicy(
|
85 | 149 | player_type=PlayerType.ATTACKER,
|
86 |
| - actions=simulation_env_config.joint_action_space_config.action_spaces[1].actions, |
87 |
| - simulation_name=simulation_env_config.name, value_function=None, q_table=None, |
88 |
| - lookup_table=[ |
89 |
| - [0.8, 0.2], |
90 |
| - [1, 0], |
91 |
| - [1, 0] |
92 |
| - ], |
93 |
| - agent_type=AgentType.RANDOM, avg_R=-1) |
94 |
| - agent = DQNCleanAgent(simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config, |
95 |
| - experiment_config=experiment_config, save_to_metastore=False) |
| 150 | + actions=simulation_env_config.joint_action_space_config.action_spaces[ |
| 151 | + 1 |
| 152 | + ].actions, |
| 153 | + simulation_name=simulation_env_config.name, |
| 154 | + value_function=None, |
| 155 | + q_table=None, |
| 156 | + lookup_table=[[0.8, 0.2], [1, 0], [1, 0]], |
| 157 | + agent_type=AgentType.RANDOM, |
| 158 | + avg_R=-1, |
| 159 | + ) |
| 160 | + agent = DQNCleanAgent( |
| 161 | + simulation_env_config=simulation_env_config, |
| 162 | + emulation_env_config=emulation_env_config, |
| 163 | + experiment_config=experiment_config, |
| 164 | + save_to_metastore=False, |
| 165 | + ) |
96 | 166 | experiment_execution = agent.train()
|
0 commit comments