Skip to content

Commit f858728

Browse files
committed
posg solve example
1 parent 3c1b4a7 commit f858728

File tree

4 files changed

+345
-1
lines changed

4 files changed

+345
-1
lines changed
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# HSVI c++
2+
3+
This directory contains example scripts for solving OS-POSGs using [hsvi](https://www.sciencedirect.com/science/article/pii/S0004370222001783).
4+
5+
Command for running hsvi with game file "apt_game.posg", 0.01 epsilon (target precision),
6+
4 pDelta (presolve delta which determined the lenght of the presolve phase), and 2000 pLimit (presolve time-limit)
7+
```bash
8+
./StochasticGamesCpp games/apt_game.posg 0.01 4 2000
9+
```
10+
11+
## Author & Maintainer
12+
13+
Kim Hammar <[email protected]>
14+
15+
## Copyright and license
16+
17+
[LICENSE](../../../LICENSE.md)
18+
19+
Creative Commons
20+
21+
(C) 2020-2024, Kim Hammar
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import numpy as np
2+
from csle_tolerance.dao.intrusion_recovery_game_config import IntrusionRecoveryGameConfig
3+
from csle_tolerance.util.intrusion_recovery_pomdp_util import IntrusionRecoveryPomdpUtil
4+
5+
if __name__ == '__main__':
6+
eta = 8
7+
p_a = 1
8+
p_c_1 = 0.01
9+
BTR = np.inf
10+
negate_costs = False
11+
discount_factor = 0.999
12+
num_observations = 10
13+
simulation_name = "csle-tolerance-intrusion-recovery-pomdp-defender-001"
14+
cost_tensor = IntrusionRecoveryPomdpUtil.cost_tensor(eta=eta, states=IntrusionRecoveryPomdpUtil.state_space(),
15+
actions=IntrusionRecoveryPomdpUtil.action_space(),
16+
negate=negate_costs)
17+
observation_tensor = IntrusionRecoveryPomdpUtil.observation_tensor(
18+
states=IntrusionRecoveryPomdpUtil.state_space(),
19+
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations))
20+
transition_tensor = IntrusionRecoveryPomdpUtil.transition_tensor_game(
21+
states=IntrusionRecoveryPomdpUtil.state_space(), defender_actions=IntrusionRecoveryPomdpUtil.action_space(),
22+
attacker_actions=IntrusionRecoveryPomdpUtil.action_space(), p_a=p_a, p_c_1=p_c_1)
23+
config = IntrusionRecoveryGameConfig(
24+
eta=eta, p_a=p_a, p_c_1=p_c_1, BTR=BTR, negate_costs=negate_costs, seed=999,
25+
discount_factor=discount_factor, states=IntrusionRecoveryPomdpUtil.state_space(),
26+
actions=IntrusionRecoveryPomdpUtil.action_space(),
27+
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
28+
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
29+
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
30+
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
31+
)
32+
33+
# s = 0
34+
# for i in range(100):
35+
# s = IntrusionRecoveryPomdpUtil.sample_next_state_game(transition_tensor=config.transition_tensor, s=s,
36+
# a1=0, a2=1)
37+
# c = config.cost_tensor[0][s]
38+
# print(f"cost: {c}, s: {s}")
39+
40+
IntrusionRecoveryPomdpUtil.generate_os_posg_game_file(game_config=config)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
from typing import List, Dict, Any
2+
import numpy as np
3+
from csle_common.dao.simulation_config.simulation_env_input_config import SimulationEnvInputConfig
4+
5+
6+
class IntrusionRecoveryGameConfig(SimulationEnvInputConfig):
7+
"""
8+
DTO containing the configuration of an intrusion recovery POSG
9+
"""
10+
11+
def __init__(self, eta: float, p_a: float, p_c_1: float, BTR: int, negate_costs: bool,
12+
seed: int, discount_factor: float, states: List[int], actions: List[int], observations: List[int],
13+
cost_tensor: List[List[float]], observation_tensor: List[List[float]],
14+
transition_tensor: List[List[List[List[float]]]], b1: List[float], T: int, simulation_env_name: str,
15+
gym_env_name: str, max_horizon: float = np.inf) -> None:
16+
"""
17+
Initializes the DTO
18+
19+
:param eta: the scaling factor for the cost function
20+
:param p_a: the intrusion probability
21+
:param p_c_1: the crash probability in the healthy state
22+
:param BTR: the periodic recovery interval
23+
:param negate_costs: boolean flag indicating whether costs should be negated or not
24+
:param seed: the random seed
25+
:param discount_factor: the discount factor
26+
:param states: the list of states
27+
:param actions: the list of actions
28+
:param observations: the list of observations
29+
:param cost_tensor: the cost tensor
30+
:param observation_tensor: the observation tensor
31+
:param transition_tensor: the transition tensor
32+
:param b1: the initial belief
33+
:param T: the time horizon
34+
:param simulation_env_name: name of the simulation environment
35+
:param gym_env_name: name of the gym environment
36+
:param max_horizon: the maximum horizon to avoid infinie simulations
37+
"""
38+
self.eta = eta
39+
self.p_a = p_a
40+
self.p_c_1 = p_c_1
41+
self.BTR = BTR
42+
self.negate_costs = negate_costs
43+
self.seed = seed
44+
self.discount_factor = discount_factor
45+
self.states = states
46+
self.actions = actions
47+
self.observations = observations
48+
self.cost_tensor = cost_tensor
49+
self.observation_tensor = observation_tensor
50+
self.transition_tensor = transition_tensor
51+
self.b1 = b1
52+
self.T = T
53+
self.simulation_env_name = simulation_env_name
54+
self.gym_env_name = gym_env_name
55+
self.max_horizon = max_horizon
56+
57+
def __str__(self) -> str:
58+
"""
59+
:return: a string representation of the DTO
60+
"""
61+
return (f"eta: {self.eta}, p_a: {self.p_a}, p_c_1: {self.p_c_1},"
62+
f"BTR: {self.BTR}, negate_costs: {self.negate_costs}, seed: {self.seed}, "
63+
f"discount_factor: {self.discount_factor}, states: {self.states}, actions: {self.actions}, "
64+
f"observations: {self.observation_tensor}, cost_tensor: {self.cost_tensor}, "
65+
f"observation_tensor: {self.observation_tensor}, transition_tensor: {self.transition_tensor}, "
66+
f"b1:{self.b1}, T: {self.T}, simulation_env_name: {self.simulation_env_name}, "
67+
f"gym_env_name: {self.gym_env_name}, max_horizon: {self.max_horizon}")
68+
69+
@staticmethod
70+
def from_dict(d: Dict[str, Any]) -> "IntrusionRecoveryGameConfig":
71+
"""
72+
Converts a dict representation to an instance
73+
74+
:param d: the dict to convert
75+
:return: the created instance
76+
"""
77+
dto = IntrusionRecoveryGameConfig(
78+
eta=d["eta"], p_a=d["p_a"], p_c_1=d["p_c_1"], BTR=d["BTR"],
79+
negate_costs=d["negate_costs"], seed=d["seed"], discount_factor=d["discount_factor"], states=d["states"],
80+
actions=d["actions"], observations=d["observations"], cost_tensor=d["cost_tensor"],
81+
observation_tensor=d["observation_tensor"], transition_tensor=d["transition_tensor"], b1=d["b1"],
82+
T=d["T"], simulation_env_name=d["simulation_env_name"], gym_env_name=d["gym_env_name"])
83+
return dto
84+
85+
def to_dict(self) -> Dict[str, Any]:
86+
"""
87+
Gets a dict representation of the object
88+
89+
:return: A dict representation of the object
90+
"""
91+
d: Dict[str, Any] = {}
92+
d["eta"] = self.eta
93+
d["p_a"] = self.p_a
94+
d["p_c_1"] = self.p_c_1
95+
d["BTR"] = self.BTR
96+
d["negate_costs"] = self.negate_costs
97+
d["seed"] = self.seed
98+
d["discount_factor"] = self.discount_factor
99+
d["states"] = self.states
100+
d["actions"] = self.actions
101+
d["observations"] = self.observations
102+
d["cost_tensor"] = self.cost_tensor
103+
d["observation_tensor"] = self.observation_tensor
104+
d["transition_tensor"] = self.transition_tensor
105+
d["b1"] = self.b1
106+
d["T"] = self.T
107+
d["simulation_env_name"] = self.simulation_env_name
108+
d["gym_env_name"] = self.simulation_env_name
109+
return d
110+
111+
@staticmethod
112+
def from_json_file(json_file_path: str) -> "IntrusionRecoveryGameConfig":
113+
"""
114+
Reads a json file and converts it to a DTO
115+
116+
:param json_file_path: the json file path
117+
:return: the converted DTO
118+
"""
119+
import io
120+
import json
121+
with io.open(json_file_path, 'r') as f:
122+
json_str = f.read()
123+
return IntrusionRecoveryGameConfig.from_dict(json.loads(json_str))

simulation-system/libs/csle-tolerance/src/csle_tolerance/util/intrusion_recovery_pomdp_util.py

+161-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from scipy.stats import betabinom
33
import numpy as np
44
from csle_tolerance.dao.intrusion_recovery_pomdp_config import IntrusionRecoveryPomdpConfig
5+
from csle_tolerance.dao.intrusion_recovery_game_config import IntrusionRecoveryGameConfig
56

67

78
class IntrusionRecoveryPomdpUtil:
@@ -26,7 +27,7 @@ def initial_belief(p_a: float) -> List[float]:
2627
:param p_a: the attack probability
2728
:return: the initial belief state
2829
"""
29-
return [1 - p_a, p_a, 0]
30+
return [1, 0, 0]
3031

3132
@staticmethod
3233
def action_space() -> List[int]:
@@ -165,6 +166,33 @@ def transition_function(s: int, s_prime: int, a: int, p_a: float, p_c_1: float,
165166
else:
166167
return 0
167168

169+
@staticmethod
170+
def transition_function_game(s: int, s_prime: int, a1: int, a2: int, p_a: float, p_c_1: float) -> float:
171+
"""
172+
The transition function of the POSG
173+
174+
:param s: the state
175+
:param s_prime: the next state
176+
:param a1: the defender action
177+
:param a2: the attacker action
178+
:param p_a: the intrusion probability
179+
:param p_c_1: the crash probability
180+
:return: P(s_prime | s, a1, a2)
181+
"""
182+
if s == 2 and s_prime == 2:
183+
return 1.0
184+
elif s_prime == 2 and s in [0, 1]:
185+
return p_c_1
186+
elif s_prime == 0 and a1 == 0 and a2 == 1 and s == 0:
187+
return (1 - p_a) * (1 - p_c_1)
188+
elif (s_prime == 0 and a2 == 0 and s == 0) or (s_prime == 0 and s == 1 and a1 == 1) \
189+
or (s_prime == 1 and s == 1 and a1 == 0):
190+
return (1 - p_c_1)
191+
elif (s_prime == 1 and s == 0 and a2 == 1):
192+
return (1 - p_c_1) * p_a
193+
else:
194+
return 0
195+
168196
@staticmethod
169197
def transition_tensor(states: List[int], actions: List[int], p_a: float, p_c_1: float, p_c_2: float, p_u: float) \
170198
-> List[List[List[float]]]:
@@ -187,10 +215,39 @@ def transition_tensor(states: List[int], actions: List[int], p_a: float, p_c_1:
187215
for s_prime in states:
188216
s_a_transitions.append(IntrusionRecoveryPomdpUtil.transition_function(
189217
s=s, s_prime=s_prime, a=a, p_a=p_a, p_c_1=p_c_1, p_c_2=p_c_2, p_u=p_u))
218+
assert round(sum(s_a_transitions), 2) == 1.0
190219
a_transitions.append(s_a_transitions)
191220
transition_tensor.append(a_transitions)
192221
return transition_tensor
193222

223+
@staticmethod
224+
def transition_tensor_game(states: List[int], defender_actions: List[int], attacker_actions: List[int],
225+
p_a: float, p_c_1: float) -> List[List[List[List[float]]]]:
226+
"""
227+
Creates a |A|x|A|x|S|x|S| tensor with the transition probabilities of the POSG
228+
229+
:param states: the list of states
230+
:param defender_actions: the list of defender actions
231+
:param attacker_actions: the list of attacker actions
232+
:param p_a: the intrusion probability
233+
:param p_c_1: the crash probability
234+
:return: the transition tensor
235+
"""
236+
transition_tensor = []
237+
for a1 in defender_actions:
238+
a1_transitions = []
239+
for a2 in attacker_actions:
240+
a2_transitions = []
241+
for s in states:
242+
s_a1_a2_transitions = []
243+
for s_prime in states:
244+
s_a1_a2_transitions.append(IntrusionRecoveryPomdpUtil.transition_function_game(
245+
s=s, s_prime=s_prime, a1=a1, a2=a2, p_a=p_a, p_c_1=p_c_1))
246+
a2_transitions.append(s_a1_a2_transitions)
247+
a1_transitions.append(a2_transitions)
248+
transition_tensor.append(a1_transitions)
249+
return transition_tensor
250+
194251
@staticmethod
195252
def sample_initial_state(b1: List[float]) -> int:
196253
"""
@@ -217,6 +274,20 @@ def sample_next_observation(observation_tensor: List[List[float]], s_prime: int,
217274
o = np.random.choice(np.arange(0, len(observations)), p=observation_probs)
218275
return int(o)
219276

277+
@staticmethod
278+
def sample_next_state_game(transition_tensor: List[List[List[List[float]]]], s: int, a1: int, a2: int) -> int:
279+
"""
280+
Samples the next observation
281+
282+
:param s: the current state
283+
:param a1: the defender action
284+
:param a2: the attacker action
285+
:param transition_tensor: the transition tensor
286+
:return: the next state a
287+
"""
288+
s_prime = np.random.choice(np.arange(0, len(transition_tensor[a1][a2][s])), p=transition_tensor[a1][a2][s])
289+
return int(s_prime)
290+
220291
@staticmethod
221292
def bayes_filter(s_prime: int, o: int, a: int, b: List[float], states: List[int], observations: List[int],
222293
observation_tensor: List[List[float]], transition_tensor: List[List[List[float]]]) -> float:
@@ -342,3 +413,92 @@ def pomdp_solver_file(config: IntrusionRecoveryPomdpConfig) -> str:
342413
c = config.cost_tensor[a][s]
343414
file_str = file_str + f"R: {a} : {s} : {s_prime} : {o} {c:.80f}\n"
344415
return file_str
416+
417+
@staticmethod
418+
def generate_transitions(game_config: IntrusionRecoveryGameConfig) -> List[str]:
419+
"""
420+
Generates the transition rows of the POSG config file of HSVI
421+
422+
:param game_config: the game configuration
423+
:return: list of transition rows
424+
"""
425+
transitions = []
426+
for s in game_config.states:
427+
for a1 in game_config.actions:
428+
for a2 in game_config.actions:
429+
for s_prime in game_config.states:
430+
for i, _ in enumerate(game_config.observations):
431+
tr_prob = game_config.transition_tensor[a1][a2][s][s_prime]
432+
obs_prob = game_config.observation_tensor[a2][i]
433+
prob = tr_prob * obs_prob
434+
if prob > 0:
435+
transition = f"{s} {a1} {a2} {i} {s_prime} {prob}"
436+
transitions.append(transition)
437+
438+
return transitions
439+
440+
@staticmethod
441+
def generate_rewards(game_config: IntrusionRecoveryGameConfig) -> List[str]:
442+
"""
443+
Generates the reward rows of the POSG config file of HSVI
444+
445+
:param game_config: the game configuration
446+
:return: list of reward rows
447+
"""
448+
rewards = []
449+
for s in game_config.states:
450+
for a1 in game_config.actions:
451+
for a2 in game_config.actions:
452+
r = -game_config.cost_tensor[a1][s]
453+
if r != 0:
454+
rew = f"{s} {a1} {a2} {r}"
455+
rewards.append(rew)
456+
return rewards
457+
458+
@staticmethod
459+
def generate_os_posg_game_file(game_config: IntrusionRecoveryGameConfig) -> str:
460+
"""
461+
Generates the POSG game file for HSVI
462+
463+
:param game_config: the game configuration
464+
:return: a string with the contents of the config file
465+
"""
466+
num_partitions = 1
467+
transitions = IntrusionRecoveryPomdpUtil.generate_transitions(game_config=game_config)
468+
rewards = IntrusionRecoveryPomdpUtil.generate_rewards(game_config=game_config)
469+
game_description = f"{len(game_config.states)} {num_partitions} {len(game_config.actions)} " \
470+
f"{len(game_config.actions)} " \
471+
f"{len(game_config.observations)} {len(transitions)} " \
472+
f"{len(rewards)} {game_config.discount_factor}"
473+
state_desriptions = []
474+
for s in game_config.states:
475+
state_desriptions.append(f"{s} {0}")
476+
player_1_actions = ["WAIT", "RECOVER"]
477+
player_2_actions = ["FALSEALARM", "ATTACK"]
478+
479+
player_2_legal_actions = []
480+
for _ in game_config.states:
481+
player_2_legal_actions.append(" ".join(list(map(lambda x: str(x), game_config.actions))))
482+
483+
player_1_legal_actions = []
484+
player_1_legal_actions.append(" ".join(list(map(lambda x: str(x), game_config.actions))))
485+
486+
obs_desriptions = []
487+
for i, o in enumerate(game_config.observations):
488+
obs_desriptions.append(f"o_{o}")
489+
490+
initial_belief_str = f"{0} {' '.join(list(map(lambda x: str(x), game_config.b1)))}"
491+
game_file_str = ""
492+
game_file_str = game_file_str + game_description + "\n"
493+
game_file_str = game_file_str + "\n".join(state_desriptions) + "\n"
494+
game_file_str = game_file_str + "\n".join(player_1_actions) + "\n"
495+
game_file_str = game_file_str + "\n".join(player_2_actions) + "\n"
496+
game_file_str = game_file_str + "\n".join(obs_desriptions) + "\n"
497+
game_file_str = game_file_str + "\n".join(player_2_legal_actions) + "\n"
498+
game_file_str = game_file_str + "\n".join(player_1_legal_actions) + "\n"
499+
game_file_str = game_file_str + "\n".join(transitions) + "\n"
500+
game_file_str = game_file_str + "\n".join(rewards) + "\n"
501+
game_file_str = game_file_str + initial_belief_str
502+
with open('recovery_game.txt', 'w') as f:
503+
f.write(game_file_str)
504+
return game_file_str

0 commit comments

Comments
 (0)