Skip to content

Commit 21f8773

Browse files
committed
castle rampart submission
1 parent 9fba5b5 commit 21f8773

File tree

11 files changed

+490
-18
lines changed

11 files changed

+490
-18
lines changed

examples/training/pomcp/cyborg_scenario_two_defender/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# POMCP for defender plannign in cyborg
1+
# POMCP for defender planning in cyborg
22

33
## Commands
44

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ppo_*
2+
*.log
3+
*.zipt
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# POMCP for defender planning in RAMPART CASTLE environment
2+
3+
## Commands
4+
5+
To run a script, execute:
6+
```bash
7+
python <script_name>
8+
```
9+
10+
## Author & Maintainer
11+
12+
Kim Hammar <[email protected]>
13+
14+
## Copyright and license
15+
16+
[LICENSE](../../../../LICENSE.md)
17+
18+
Creative Commons
19+
20+
(C) 2020-2024, Kim Hammar
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
from typing import List, Tuple, Dict, Any
2+
import numpy.typing as npt
3+
from gym_csle_cyborg.dao.activity_type import ActivityType
4+
from csle_agents.agents.pomcp.pomcp_acquisition_function_type import POMCPAcquisitionFunctionType
5+
from gym_csle_cyborg.dao.csle_cyborg_wrapper_config import CSLECyborgWrapperConfig
6+
from gym_csle_cyborg.envs.cyborg_scenario_two_wrapper import CyborgScenarioTwoWrapper
7+
from gym_csle_cyborg.dao.red_agent_type import RedAgentType
8+
from gym_csle_cyborg.dao.csle_cyborg_config import CSLECyborgConfig
9+
from gym_csle_cyborg.util.cyborg_env_util import CyborgEnvUtil
10+
from csle_agents.agents.pomcp.pomcp import POMCP
11+
from gym_csle_cyborg.dao.blue_agent_action_type import BlueAgentActionType
12+
import gym_csle_cyborg.constants.constants as env_constants
13+
from csle_common.util.experiment_util import ExperimentUtil
14+
15+
16+
class CPOMCP:
17+
"""
18+
Causal Partially Observable Monte-Carlo Planning (C-POMCP). A planning agent for Cage-2.
19+
Paper: https://arxiv.org/abs/2407.11070. Author: Kim Hammar (ORLANDO Siemens/KTH).
20+
"""
21+
22+
def __init__(self, ckpt_dir=None):
23+
"""
24+
Initializes the agent
25+
26+
:param ckpt_dir:
27+
"""
28+
ExperimentUtil.set_seed(1258192)
29+
self.gamma = 0.99
30+
self.c = 0.5
31+
self.c2 = 15000
32+
self.planning_time = 10
33+
self.max_particles = 500
34+
self.rollout_policy = lambda x, deterministic: 35
35+
self.value_function = lambda x: 0
36+
self.reinvigoration = False
37+
self.verbose = False
38+
self.default_node_value = 0
39+
self.prior_weight = 5
40+
self.prior_confidence = 0
41+
self.reinvigorated_particles_ratio = 0.0
42+
self.prune_action_space = False
43+
self.prune_size = 3
44+
self.acquisition_function_type = POMCPAcquisitionFunctionType.UCB
45+
self.use_rollout_policy = False
46+
self.rollout_depth = 4
47+
self.planning_depth = 50
48+
self.train_env_config = CSLECyborgWrapperConfig(
49+
gym_env_name="csle-cyborg-scenario-two-wrapper-v1", maximum_steps=100, save_trace=False, scenario=2,
50+
reward_shaping=True, red_agent_type=RedAgentType.B_LINE_AGENT)
51+
self.train_env = CyborgScenarioTwoWrapper(config=self.train_env_config)
52+
self.cyborg_config = CSLECyborgConfig(
53+
gym_env_name="csle-cyborg-scenario-two-v1", scenario=2, baseline_red_agents=[RedAgentType.B_LINE_AGENT],
54+
maximum_steps=100, red_agent_distribution=[1.0], reduced_action_space=True, scanned_state=True,
55+
decoy_state=True, decoy_optimization=False, cache_visited_states=True, save_trace=False,
56+
randomize_topology=False)
57+
(cyborg_scenario_config_path, cyborg_challenge_env, cyborg_hostnames, cyborg_hostname_to_id,
58+
cyborg_subnets, cyborg_subnet_to_id, cyborg_action_id_to_type_and_host, cyborg_action_type_and_host_to_id,
59+
red_agent_type) = CyborgEnvUtil.setup_cyborg_env(config=self.cyborg_config)
60+
self.cyborg_scenario_config_path = cyborg_scenario_config_path
61+
self.cyborg_challenge_env = cyborg_challenge_env
62+
self.cyborg_hostnames = cyborg_hostnames
63+
self.cyborg_hostname_to_id = cyborg_hostname_to_id
64+
self.cyborg_subnets = cyborg_subnets
65+
self.cyborg_subnet_to_id = cyborg_subnet_to_id
66+
self.cyborg_action_id_to_type_and_host = cyborg_action_id_to_type_and_host
67+
self.cyborg_action_type_and_host_to_id = cyborg_action_type_and_host_to_id
68+
self.red_agent_type = red_agent_type
69+
self.decoy_action_types = CyborgEnvUtil.get_decoy_action_types(scenario=self.cyborg_config.scenario)
70+
self.decoy_actions_per_host = CyborgEnvUtil.get_decoy_actions_per_host(scenario=self.cyborg_config.scenario)
71+
self.end_episode()
72+
73+
def get_action(self, obs, action_space=None) -> int:
74+
"""
75+
Gets the next action
76+
77+
:param obs: the latest observation
78+
:param action_space: the action space
79+
:return: the next action (integer)
80+
"""
81+
if self.t > 1:
82+
obs_id, scan_state = CPOMCP.update_scan_state(
83+
obs=obs, cyborg_hostnames=self.cyborg_hostnames,
84+
scan_state=self.scan_state, decoy_state=self.decoy_state)
85+
self.scan_state = scan_state
86+
self.pomcp.update_tree_with_new_samples(action_sequence=self.action_sequence, observation=obs_id,
87+
t=self.t - 1)
88+
self.pomcp.solve(max_rollout_depth=self.rollout_depth, max_planning_depth=self.planning_depth, t=self.t)
89+
action = self.pomcp.get_action()
90+
self.action_sequence.append(action)
91+
cyborg_action, decoy_state = CPOMCP.encode_action(
92+
action=action, action_id_to_type_and_host=self.action_id_to_type_and_host,
93+
cyborg_action_type_and_host_to_id=self.cyborg_action_type_and_host_to_id,
94+
decoy_action_types=self.decoy_action_types, decoy_actions_per_host=self.decoy_actions_per_host,
95+
decoy_state=self.decoy_state, cyborg_hostname_to_id=self.cyborg_hostname_to_id,
96+
cyborg_action_id_to_type_and_host=self.cyborg_action_id_to_type_and_host)
97+
self.decoy_state = decoy_state
98+
self.t += 1
99+
return cyborg_action
100+
101+
def end_episode(self) -> bool:
102+
"""
103+
Cleans up the state for a new episode
104+
105+
:return: True
106+
"""
107+
self.action_sequence = []
108+
self.t = 1
109+
self.train_env.reset()
110+
self.pomcp = POMCP(
111+
A=self.train_env.get_action_space(), gamma=self.gamma, env=self.train_env, c=self.c,
112+
initial_particles=self.train_env.initial_particles, planning_time=self.planning_time,
113+
max_particles=self.max_particles, rollout_policy=self.rollout_policy, value_function=self.value_function,
114+
reinvigoration=self.reinvigoration, verbose=self.verbose, default_node_value=self.default_node_value,
115+
prior_weight=self.prior_weight, acquisition_function_type=self.acquisition_function_type, c2=self.c2,
116+
use_rollout_policy=self.use_rollout_policy, prior_confidence=self.prior_confidence,
117+
reinvigorated_particles_ratio=self.reinvigorated_particles_ratio,
118+
prune_action_space=self.prune_action_space, prune_size=self.prune_size)
119+
scan_state, decoy_state = CPOMCP.reset_scan_and_decoy_states(
120+
cyborg_hostnames=self.cyborg_hostnames)
121+
self.scan_state = scan_state
122+
self.decoy_state = decoy_state
123+
action_id_to_type_and_host, type_and_host_to_action_id = CyborgEnvUtil.get_action_dicts(
124+
scenario=self.cyborg_config.scenario, decoy_state=self.cyborg_config.decoy_state,
125+
reduced_action_space=self.cyborg_config.reduced_action_space,
126+
decoy_optimization=self.cyborg_config.decoy_optimization)
127+
self.action_id_to_type_and_host = action_id_to_type_and_host
128+
self.type_and_host_to_action_id = type_and_host_to_action_id
129+
states, lookup_table, hosts_lookup_tables = CyborgEnvUtil.get_decoy_state_space(config=self.cyborg_config)
130+
self.decoy_hosts = CyborgEnvUtil.get_decoy_hosts(scenario=self.cyborg_config.scenario)
131+
self.decoy_state_space = states
132+
self.decoy_state_space_lookup = lookup_table
133+
self.decoy_state_space_hosts_lookup = hosts_lookup_tables
134+
self.observation_id_to_tensor: Dict[int, npt.NDArray[Any]] = {}
135+
self.initial_belief = {1: 1.0}
136+
return True
137+
138+
@staticmethod
139+
def reset_scan_and_decoy_states(cyborg_hostnames: List[str]) -> Tuple[List[int], List[List[BlueAgentActionType]]]:
140+
"""
141+
Resets the scan and decoy states
142+
143+
:param cyborg_hostnames: list of cyborg hostnames
144+
:return: the reset scan and decoy states
145+
"""
146+
scan_state: List[int] = []
147+
decoy_state: List[List[BlueAgentActionType]] = []
148+
for i in range(len(cyborg_hostnames)):
149+
scan_state.append(env_constants.CYBORG.NOT_SCANNED)
150+
decoy_state.append([])
151+
return scan_state, decoy_state
152+
153+
@staticmethod
154+
def encode_action(action: int, action_id_to_type_and_host: Dict[int, Tuple[BlueAgentActionType, str]],
155+
cyborg_action_type_and_host_to_id: Dict[Tuple[BlueAgentActionType, str], int],
156+
decoy_action_types: List[BlueAgentActionType], cyborg_hostname_to_id: Dict[str, int],
157+
decoy_actions_per_host: List[List[BlueAgentActionType]],
158+
decoy_state: List[List[BlueAgentActionType]],
159+
cyborg_action_id_to_type_and_host: Dict[int, Tuple[BlueAgentActionType, str]]) \
160+
-> Tuple[int, List[List[BlueAgentActionType]]]:
161+
"""
162+
Encodes an action into a cyborg action
163+
164+
:param action: the action to encode
165+
:param action_id_to_type_and_host: a dict to convert from action id to type and host
166+
:param cyborg_action_type_and_host_to_id: a dict to convert from cyborg action id to type and host
167+
:param decoy_action_types: types of decoy actions
168+
:param cyborg_hostname_to_id: a dict to convert from cyborg hostname to id
169+
:param decoy_actions_per_host: a list of decoy actions per host
170+
:param decoy_state: the decoy state of the environment
171+
:param cyborg_action_id_to_type_and_host: a dict to convert from cyborg action id to action type and host
172+
:return: the encoded action and the updated decoy state
173+
"""
174+
action_type, host = action_id_to_type_and_host[action]
175+
action = cyborg_action_type_and_host_to_id[(action_type, host)]
176+
if action_type in decoy_action_types:
177+
host_id = cyborg_hostname_to_id[host]
178+
decoy_found = False
179+
for decoy_action in decoy_actions_per_host[host_id]:
180+
if decoy_action not in decoy_state[host_id]:
181+
action_type = decoy_action
182+
action = cyborg_action_type_and_host_to_id[(action_type, host)]
183+
decoy_state[host_id].append(action_type)
184+
decoy_found = True
185+
break
186+
if not decoy_found:
187+
action_type = BlueAgentActionType.REMOVE
188+
action = cyborg_action_type_and_host_to_id[(action_type, host)]
189+
action_type, host = cyborg_action_id_to_type_and_host[action]
190+
# Restore action removes decoys
191+
if action_type == BlueAgentActionType.RESTORE:
192+
host_id = cyborg_hostname_to_id[host]
193+
decoy_state[host_id] = []
194+
return action, decoy_state
195+
196+
@staticmethod
197+
def update_scan_state(obs: npt.NDArray[Any], cyborg_hostnames: List[str], scan_state: List[int],
198+
decoy_state: List[List[BlueAgentActionType]]) -> Tuple[int, List[int]]:
199+
"""
200+
Updates the scan state
201+
202+
:param obs: the latest cyborg observation
203+
:param cyborg_hostnames: the hostnames
204+
:param scan_state: the scane state
205+
:param decoy_state: the decoy state
206+
:return: The observation id and the updated scan state
207+
"""
208+
obs_per_host = []
209+
idx = 0
210+
for i in range(len(cyborg_hostnames)):
211+
host_vector_obs = obs[idx:idx + 4].tolist()
212+
idx += 4
213+
host_obs = {}
214+
if host_vector_obs[2:] == [1, 1]:
215+
host_obs[env_constants.CYBORG.COMPROMISED] = 2
216+
elif host_vector_obs[2:] == [0, 1]:
217+
host_obs[env_constants.CYBORG.COMPROMISED] = 1
218+
elif host_vector_obs[2:] == [1, 0]:
219+
host_obs[env_constants.CYBORG.COMPROMISED] = 3
220+
else:
221+
host_obs[env_constants.CYBORG.COMPROMISED] = 0
222+
223+
if host_vector_obs[0:2] == [1, 1]:
224+
host_obs[env_constants.CYBORG.ACTIVITY] = 2
225+
elif host_vector_obs[0:2] == [0, 1] or host_vector_obs[0:2] == [1, 0]:
226+
host_obs[env_constants.CYBORG.ACTIVITY] = 1
227+
else:
228+
host_obs[env_constants.CYBORG.ACTIVITY] = 0
229+
230+
if host_obs[env_constants.CYBORG.ACTIVITY] == ActivityType.SCAN:
231+
scan_state = [1 if x == 2 else x for x in scan_state]
232+
scan_state[i] = 2
233+
host_obs[env_constants.CYBORG.SCANNED_STATE] = scan_state[i]
234+
host_obs[env_constants.CYBORG.DECOY_STATE] = len(decoy_state[i])
235+
obs_per_host.append(host_obs)
236+
obs_id = CPOMCP.observation_id(obs_per_host=obs_per_host)
237+
return obs_id, scan_state
238+
239+
@staticmethod
240+
def observation_id(obs_per_host) -> int:
241+
"""
242+
Gets the current observation id
243+
244+
:param cyborg_hostname_to_id: a dict to convert from hostname to id
245+
:param decoy_state: the current decoy state
246+
:param scan_state: the current scan state
247+
:param env: the environment
248+
:return: the current observation id
249+
"""
250+
host_obs_vecs = []
251+
for i in range(len(obs_per_host)):
252+
vec = [obs_per_host[i][env_constants.CYBORG.ACTIVITY], obs_per_host[i][env_constants.CYBORG.SCANNED_STATE],
253+
obs_per_host[i][env_constants.CYBORG.COMPROMISED], obs_per_host[i][env_constants.CYBORG.DECOY_STATE]]
254+
host_obs_vecs.append(vec)
255+
obs_id = CyborgEnvUtil.state_vector_to_state_id(state_vector=host_obs_vecs, observation=True)
256+
return obs_id
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from model import CPOMCP
2+
import numpy as np
3+
4+
if __name__ == '__main__':
5+
cpomcp = CPOMCP()
6+
# t=1
7+
print("Computing the next action..")
8+
a1 = cpomcp.get_action(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
10+
0, 0, 0, 0, 0, 0, 0, 0]))
11+
print(f"The next action is: {a1}")
12+
# t=2
13+
print("Computing the next action..")
14+
a2 = cpomcp.get_action(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16+
0, 0, 0, 0, 0, 0, 0, 0]))
17+
print(f"The next action is: {a2}")
18+
# t=3
19+
print("Computing the next action..")
20+
a3 = cpomcp.get_action(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
22+
0, 0, 0, 0, 0, 0, 0, 0]))
23+
print(f"The next action is: {a3}")
24+
# t=4
25+
print("Computing the next action..")
26+
a4 = cpomcp.get_action(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
28+
0, 0, 0, 0, 0, 0, 0, 0]))
29+
print(f"The next action is: {a4}")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
csle-common==0.7.1
2+
csle-agents==0.7.1
3+
gym-csle-cyborg==0.7.1
4+
numpy==1.23.5

0 commit comments

Comments
 (0)