Skip to content

Commit 45721bd

Browse files
authored
Merge pull request #353 from Limmen/dev
Multilevel Coordinate Search (MCS)
2 parents f858728 + f4a58e3 commit 45721bd

File tree

18 files changed

+4787
-108
lines changed

18 files changed

+4787
-108
lines changed

dev

Whitespace-only changes.

examples/training/dqn_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py

+122-52
Original file line numberDiff line numberDiff line change
@@ -8,89 +8,159 @@
88
from csle_common.dao.training.tabular_policy import TabularPolicy
99
from csle_common.metastore.metastore_facade import MetastoreFacade
1010

11-
if __name__ == '__main__':
12-
emulation_name = "csle-level9-040"
11+
if __name__ == "__main__":
12+
emulation_name = "csle-level1-050"
1313
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
1414
if emulation_env_config is None:
15-
raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
15+
raise ValueError(
16+
f"Could not find an emulation environment with the name: {emulation_name}"
17+
)
1618
simulation_name = "csle-intrusion-response-game-local-pomdp-defender-001"
1719
simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
1820
if simulation_env_config is None:
1921
raise ValueError(f"Could not find a simulation with name: {simulation_name}")
2022
experiment_config = ExperimentConfig(
2123
output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}dqn_clean_test",
22-
title="DQN_clean test", random_seeds=[399, 98912, 999], agent_type=AgentType.DQN_CLEAN,
24+
title="DQN_clean test",
25+
random_seeds=[399, 98912, 999],
26+
agent_type=AgentType.DQN_CLEAN,
2327
log_every=1000,
2428
hparams={
2529
constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam(
26-
value=7, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
27-
descr="neurons per hidden layer of the policy network"),
30+
value=7,
31+
name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
32+
descr="neurons per hidden layer of the policy network",
33+
),
2834
constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam(
29-
value=4, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
30-
descr="number of layers of the policy network"),
35+
value=4,
36+
name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
37+
descr="number of layers of the policy network",
38+
),
3139
agents_constants.DQN_CLEAN.EXP_FRAC: HParam(
32-
value=0.5, name=agents_constants.DQN_CLEAN.EXP_FRAC,
33-
descr="the fraction of `total-timesteps it takes from start-e to go end-e"),
40+
value=0.5,
41+
name=agents_constants.DQN_CLEAN.EXP_FRAC,
42+
descr="the fraction of `total-timesteps it takes from start-e to go end-e",
43+
),
3444
agents_constants.DQN_CLEAN.TAU: HParam(
35-
value=1.0, name=agents_constants.DQN_CLEAN.TAU, descr="target network update rate"),
45+
value=1.0,
46+
name=agents_constants.DQN_CLEAN.TAU,
47+
descr="target network update rate",
48+
),
3649
agents_constants.COMMON.BATCH_SIZE: HParam(
37-
value=64, name=agents_constants.COMMON.BATCH_SIZE, descr="batch size for updates"),
50+
value=64,
51+
name=agents_constants.COMMON.BATCH_SIZE,
52+
descr="batch size for updates",
53+
),
3854
agents_constants.DQN_CLEAN.LEARNING_STARTS: HParam(
39-
value=10000, name=agents_constants.DQN_CLEAN.LEARNING_STARTS, descr="timestep to start learning"),
55+
value=10000,
56+
name=agents_constants.DQN_CLEAN.LEARNING_STARTS,
57+
descr="timestep to start learning",
58+
),
4059
agents_constants.DQN_CLEAN.TRAIN_FREQ: HParam(
41-
value=10, name=agents_constants.DQN_CLEAN.TRAIN_FREQ, descr="the frequency of training"),
60+
value=10,
61+
name=agents_constants.DQN_CLEAN.TRAIN_FREQ,
62+
descr="the frequency of training",
63+
),
4264
agents_constants.DQN_CLEAN.T_N_FREQ: HParam(
43-
value=500, name=agents_constants.DQN_CLEAN.T_N_FREQ,
44-
descr="the batch size of sample from the reply memory"),
65+
value=500,
66+
name=agents_constants.DQN_CLEAN.T_N_FREQ,
67+
descr="the batch size of sample from the reply memory",
68+
),
4569
agents_constants.DQN_CLEAN.BUFFER_SIZE: HParam(
46-
value=1000, name=agents_constants.DQN_CLEAN.BUFFER_SIZE, descr="the replay memory buffer size"),
70+
value=1000,
71+
name=agents_constants.DQN_CLEAN.BUFFER_SIZE,
72+
descr="the replay memory buffer size",
73+
),
4774
agents_constants.DQN_CLEAN.SAVE_MODEL: HParam(
48-
value=False, name=agents_constants.DQN_CLEAN.SAVE_MODEL, descr="decision param for model saving"),
75+
value=False,
76+
name=agents_constants.DQN_CLEAN.SAVE_MODEL,
77+
descr="decision param for model saving",
78+
),
4979
agents_constants.COMMON.LEARNING_RATE: HParam(
50-
value=2.4e-5, name=agents_constants.COMMON.LEARNING_RATE,
51-
descr="learning rate for updating the policy"),
80+
value=2.4e-5,
81+
name=agents_constants.COMMON.LEARNING_RATE,
82+
descr="learning rate for updating the policy",
83+
),
5284
agents_constants.DQN_CLEAN.NUM_STEPS: HParam(
53-
value=164, name=agents_constants.DQN_CLEAN.NUM_STEPS, descr="number of steps in each time step"),
85+
value=164,
86+
name=agents_constants.DQN_CLEAN.NUM_STEPS,
87+
descr="number of steps in each time step",
88+
),
5489
constants.NEURAL_NETWORKS.DEVICE: HParam(
55-
value="cpu", name=constants.NEURAL_NETWORKS.DEVICE, descr="the device to train on (cpu or cuda:x)"),
90+
value="cpu",
91+
name=constants.NEURAL_NETWORKS.DEVICE,
92+
descr="the device to train on (cpu or cuda:x)",
93+
),
5694
agents_constants.COMMON.NUM_PARALLEL_ENVS: HParam(
57-
value=1, name=agents_constants.COMMON.NUM_PARALLEL_ENVS,
58-
descr="the nunmber of parallel environments for training"),
95+
value=1,
96+
name=agents_constants.COMMON.NUM_PARALLEL_ENVS,
97+
descr="the nunmber of parallel environments for training",
98+
),
5999
agents_constants.COMMON.GAMMA: HParam(
60-
value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"),
100+
value=0.99,
101+
name=agents_constants.COMMON.GAMMA,
102+
descr="the discount factor",
103+
),
61104
agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam(
62-
value=int(100000), name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
63-
descr="number of timesteps to train"),
64-
agents_constants.COMMON.EVAL_EVERY: HParam(value=1, name=agents_constants.COMMON.EVAL_EVERY,
65-
descr="training iterations between evaluations"),
66-
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
67-
descr="the batch size for evaluation"),
68-
agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY,
69-
descr="how frequently to save the model"),
105+
value=int(100000),
106+
name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
107+
descr="number of timesteps to train",
108+
),
109+
agents_constants.COMMON.EVAL_EVERY: HParam(
110+
value=1,
111+
name=agents_constants.COMMON.EVAL_EVERY,
112+
descr="training iterations between evaluations",
113+
),
114+
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(
115+
value=100,
116+
name=agents_constants.COMMON.EVAL_BATCH_SIZE,
117+
descr="the batch size for evaluation",
118+
),
119+
agents_constants.COMMON.SAVE_EVERY: HParam(
120+
value=10000,
121+
name=agents_constants.COMMON.SAVE_EVERY,
122+
descr="how frequently to save the model",
123+
),
70124
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
71-
value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
72-
descr="confidence interval"),
125+
value=0.95,
126+
name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
127+
descr="confidence interval",
128+
),
73129
agents_constants.COMMON.MAX_ENV_STEPS: HParam(
74-
value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
75-
descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
130+
value=500,
131+
name=agents_constants.COMMON.MAX_ENV_STEPS,
132+
descr="maximum number of steps in the environment (for envs with infinite horizon generally)",
133+
),
76134
agents_constants.COMMON.RUNNING_AVERAGE: HParam(
77-
value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
78-
descr="the number of samples to include when computing the running avg"),
79-
agents_constants.COMMON.L: HParam(value=3, name=agents_constants.COMMON.L,
80-
descr="the number of stop actions")
135+
value=100,
136+
name=agents_constants.COMMON.RUNNING_AVERAGE,
137+
descr="the number of samples to include when computing the running avg",
138+
),
139+
agents_constants.COMMON.L: HParam(
140+
value=3,
141+
name=agents_constants.COMMON.L,
142+
descr="the number of stop actions",
143+
),
81144
},
82-
player_type=PlayerType.DEFENDER, player_idx=0
145+
player_type=PlayerType.DEFENDER,
146+
player_idx=0,
83147
)
84148
simulation_env_config.simulation_env_input_config.attacker_strategy = TabularPolicy(
85149
player_type=PlayerType.ATTACKER,
86-
actions=simulation_env_config.joint_action_space_config.action_spaces[1].actions,
87-
simulation_name=simulation_env_config.name, value_function=None, q_table=None,
88-
lookup_table=[
89-
[0.8, 0.2],
90-
[1, 0],
91-
[1, 0]
92-
],
93-
agent_type=AgentType.RANDOM, avg_R=-1)
94-
agent = DQNCleanAgent(simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config,
95-
experiment_config=experiment_config, save_to_metastore=False)
150+
actions=simulation_env_config.joint_action_space_config.action_spaces[
151+
1
152+
].actions,
153+
simulation_name=simulation_env_config.name,
154+
value_function=None,
155+
q_table=None,
156+
lookup_table=[[0.8, 0.2], [1, 0], [1, 0]],
157+
agent_type=AgentType.RANDOM,
158+
avg_R=-1,
159+
)
160+
agent = DQNCleanAgent(
161+
simulation_env_config=simulation_env_config,
162+
emulation_env_config=emulation_env_config,
163+
experiment_config=experiment_config,
164+
save_to_metastore=False,
165+
)
96166
experiment_execution = agent.train()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import csle_agents.constants.constants as agents_constants
2+
import csle_common.constants.constants as constants
3+
from csle_agents.agents.mcs.mcs_agent import MCSAgent
4+
from csle_agents.common.objective_type import ObjectiveType
5+
from csle_common.dao.training.agent_type import AgentType
6+
from csle_common.dao.training.experiment_config import ExperimentConfig
7+
from csle_common.dao.training.hparam import HParam
8+
from csle_common.dao.training.player_type import PlayerType
9+
from csle_common.dao.training.policy_type import PolicyType
10+
from csle_common.metastore.metastore_facade import MetastoreFacade
11+
12+
if __name__ == "__main__":
13+
emulation_name = "csle-level1-050"
14+
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
15+
if emulation_env_config is None:
16+
raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
17+
simulation_name = "csle-stopping-pomdp-defender-002"
18+
simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
19+
if simulation_env_config is None:
20+
raise ValueError(f"Could not find a simulation with name: {simulation_name}")
21+
experiment_config = ExperimentConfig(
22+
output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}particle_swarm_test",
23+
title="Multilevel Coordinate Search",
24+
random_seeds=[399, 98912],
25+
agent_type=AgentType.MCS,
26+
log_every=1,
27+
hparams={
28+
agents_constants.MCS.STEP: HParam(value=1000, name=agents_constants.MCS.STEP, descr="step"),
29+
agents_constants.MCS.STEP1: HParam(value=10000, name=agents_constants.MCS.STEP1, descr="step1"),
30+
agents_constants.MCS.U: HParam(value=[-20, -20, -20], name=agents_constants.MCS.U,
31+
descr="initial lower corner"),
32+
agents_constants.MCS.LOCAL: HParam(value=50, name=agents_constants.MCS.LOCAL,
33+
descr="local value stating to which degree to perform local searches"),
34+
agents_constants.MCS.V: HParam(value=[20, 20, 20], name=agents_constants.MCS.V,
35+
descr="initial upper corner"),
36+
agents_constants.MCS.STOPPING_ACTIONS: HParam(
37+
value=3, name=agents_constants.MCS.L, descr="no. of stopping actions"),
38+
agents_constants.COMMON.MAX_ENV_STEPS: HParam(
39+
value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
40+
descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
41+
agents_constants.MCS.IINIT: HParam(
42+
value=0, name=agents_constants.MCS.IINIT, descr="simple initialization list"),
43+
agents_constants.MCS.GAMMA: HParam(
44+
value=2.220446049250313e-16, name=agents_constants.MCS.GAMMA, descr="MCS gamma value"),
45+
agents_constants.MCS.EPSILON: HParam(
46+
value=2.220446049250313e-16, name=agents_constants.MCS.EPSILON, descr="MCS epsilon value"),
47+
agents_constants.MCS.M: HParam(
48+
value=1, name=agents_constants.MCS.M, descr="m value"),
49+
agents_constants.MCS.PRT: HParam(
50+
value=1, name=agents_constants.MCS.PRT, descr="print level"),
51+
agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(
52+
value=10, name=agents_constants.COMMON.EVAL_BATCH_SIZE, descr="number of iterations to evaluate theta"),
53+
agents_constants.COMMON.SAVE_EVERY: HParam(
54+
value=1000, name=agents_constants.COMMON.SAVE_EVERY, descr="how frequently to save the model"),
55+
agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
56+
value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, descr="confidence interval"),
57+
agents_constants.COMMON.RUNNING_AVERAGE: HParam(
58+
value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
59+
descr="the number of samples to include when computing the running avg"),
60+
agents_constants.COMMON.GAMMA: HParam(
61+
value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"),
62+
agents_constants.MCS.POLICY_TYPE: HParam(
63+
value=PolicyType.MULTI_THRESHOLD, name=agents_constants.PARTICLE_SWARM.POLICY_TYPE,
64+
descr="policy type for the execution"),
65+
agents_constants.MCS.OBJECTIVE_TYPE: HParam(
66+
value=ObjectiveType.MAX, name=agents_constants.PARTICLE_SWARM.OBJECTIVE_TYPE, descr="Objective type"),
67+
},
68+
player_type=PlayerType.DEFENDER, player_idx=0,
69+
)
70+
agent = MCSAgent(
71+
simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config,
72+
experiment_config=experiment_config, save_to_metastore=False)
73+
experiment_execution = agent.train()
74+
# MetastoreFacade.save_experiment_execution(experiment_execution)
75+
# for policy in experiment_execution.result.policies.values():
76+
# if experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value == PolicyType.MULTI_THRESHOLD:
77+
# MetastoreFacade.save_multi_threshold_stopping_policy(multi_threshold_stopping_policy=policy)
78+
# elif experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value \
79+
# == PolicyType.LINEAR_THRESHOLD:
80+
# MetastoreFacade.save_linear_threshold_stopping_policy(linear_threshold_stopping_policy=policy)
81+
# else:
82+
# raise ValueError("Policy type: "
83+
# f"{experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value} "
84+
# f"not recognized for MCS")

examples/training/nelder_mead/stopping_pompd_defender/run_vs_random_attacker_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from csle_agents.common.objective_type import ObjectiveType
1212

1313
if __name__ == '__main__':
14-
emulation_name = "csle-level9-030"
14+
emulation_name = "csle-level1-050"
1515
emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
1616
if emulation_env_config is None:
1717
raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")

0 commit comments

Comments
 (0)