Skip to content

Commit 60c931f

Browse files
committed
belief aggregation, stopping
1 parent edae518 commit 60c931f

File tree

15 files changed

+236
-88
lines changed

15 files changed

+236
-88
lines changed

examples/manual_play/intrusion_recovery_pomdp.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
actions=IntrusionRecoveryPomdpUtil.action_space(),
3535
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
3636
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
37-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
37+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
3838
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
3939
)
4040
env = gym.make("csle-tolerance-intrusion-recovery-pomdp-v1",

examples/training/bayesian_optimization/intrusion_recovery_pomdp_defender/run_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
actions=IntrusionRecoveryPomdpUtil.action_space(),
4747
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
4848
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
49-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
49+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
5050
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
5151
)
5252
simulation_env_config.simulation_env_input_config = input_config

examples/training/cross_entropy/intrusion_recovery_pomdp_defender/run_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
actions=IntrusionRecoveryPomdpUtil.action_space(),
4747
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
4848
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
49-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
49+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
5050
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
5151
)
5252
simulation_env_config.simulation_env_input_config = input_config

examples/training/differential_evolution/intrusion_recovery_pomdp_defender/run_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
actions=IntrusionRecoveryPomdpUtil.action_space(),
4747
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
4848
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
49-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
49+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
5050
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
5151
)
5252
simulation_env_config.simulation_env_input_config = input_config

examples/training/pomdp_solve/intrusion_recovery_pomdp/run_vs_random_attacker_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
actions=IntrusionRecoveryPomdpUtil.action_space(),
3030
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
3131
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
32-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
32+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
3333
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
3434
)
3535
pomdp_solve_file_str = IntrusionRecoveryPomdpUtil.pomdp_solver_file(config=config)

examples/training/posg_solve/intrusion_recovery_pomdp/run_vs_random_attacker_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
actions=IntrusionRecoveryPomdpUtil.action_space(),
2727
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
2828
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
29-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
29+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
3030
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
3131
)
3232

examples/training/ppo/intrusion_recovery_pomdp_defender/run_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
actions=IntrusionRecoveryPomdpUtil.action_space(),
4444
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
4545
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
46-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
46+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
4747
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
4848
)
4949
input_config.max_horizon = 30

examples/training/random_search/intrusion_recovery_pomdp_defender/run_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
actions=IntrusionRecoveryPomdpUtil.action_space(),
4646
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
4747
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
48-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
48+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
4949
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
5050
)
5151
simulation_env_config.simulation_env_input_config = input_config

examples/training/t_spsa/intrusion_recovery_pomdp_defender/run_v_001.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
actions=IntrusionRecoveryPomdpUtil.action_space(),
4545
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
4646
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
47-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
47+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
4848
simulation_env_name=simulation_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
4949
)
5050
simulation_env_config.simulation_env_input_config = input_config

simulation-system/envs/intrusion_recovery_pomdp_defender/config_v_001.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,7 @@ def default_initial_state_distribution_config(p_a: float) -> InitialStateDistrib
262262
:return: the default initial state distribution configuration
263263
"""
264264
initial_state_distribution_config = InitialStateDistributionConfig(
265-
initial_state_distribution=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a)
266-
)
265+
initial_state_distribution=IntrusionRecoveryPomdpUtil.initial_belief())
267266
return initial_state_distribution_config
268267

269268

@@ -301,7 +300,7 @@ def default_input_config(eta: float, p_a: float, p_c_1: float, p_c_2: float, p_u
301300
actions=IntrusionRecoveryPomdpUtil.action_space(),
302301
observations=IntrusionRecoveryPomdpUtil.observation_space(num_observations=num_observations),
303302
cost_tensor=cost_tensor, observation_tensor=observation_tensor, transition_tensor=transition_tensor,
304-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a), T=BTR,
303+
b1=IntrusionRecoveryPomdpUtil.initial_belief(), T=BTR,
305304
simulation_env_name=simulation_env_name, gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1"
306305
)
307306
return config

simulation-system/libs/csle-tolerance/src/csle_tolerance/util/intrusion_recovery_pomdp_util.py

+19-70
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def state_space() -> List[int]:
2424
return [0, 1, 2]
2525

2626
@staticmethod
27-
def initial_belief(p_a: float) -> List[float]:
27+
def initial_belief() -> List[float]:
2828
"""
2929
Gets the initial belief state of the POMDP
3030
@@ -72,9 +72,7 @@ def cost_function(s: int, a: int, eta: float, negate: bool = False) -> float:
7272
return cost
7373

7474
@staticmethod
75-
def cost_tensor(
76-
eta: float, states: List[int], actions: List[int], negate: bool = False
77-
) -> List[List[float]]:
75+
def cost_tensor(eta: float, states: List[int], actions: List[int], negate: bool = False) -> List[List[float]]:
7876
"""
7977
Creates a |A|x|S| tensor with the costs (or rewards) of the POMDP
8078
@@ -119,9 +117,7 @@ def observation_function(s: int, o: int, num_observations: int) -> float:
119117
return 0.0
120118

121119
@staticmethod
122-
def observation_tensor(
123-
states: List[int], observations: List[int]
124-
) -> List[List[float]]:
120+
def observation_tensor(states: List[int], observations: List[int]) -> List[List[float]]:
125121
"""
126122
Creates a |S|x|O| tensor with the observation probabilities
127123
@@ -143,9 +139,7 @@ def observation_tensor(
143139
return observation_tensor
144140

145141
@staticmethod
146-
def transition_function(
147-
s: int, s_prime: int, a: int, p_a: float, p_c_1: float, p_u: float, p_c_2: float
148-
) -> float:
142+
def transition_function(s: int, s_prime: int, a: int, p_a: float, p_c_1: float, p_u: float, p_c_2: float) -> float:
149143
"""
150144
The transition function of the POMDP
151145
@@ -184,9 +178,7 @@ def transition_function(
184178
return 0
185179

186180
@staticmethod
187-
def transition_function_game(
188-
s: int, s_prime: int, a1: int, a2: int, p_a: float, p_c_1: float
189-
) -> float:
181+
def transition_function_game(s: int, s_prime: int, a1: int, a2: int, p_a: float, p_c_1: float) -> float:
190182
"""
191183
The transition function of the POSG
192184
@@ -216,14 +208,8 @@ def transition_function_game(
216208
return 0
217209

218210
@staticmethod
219-
def transition_tensor(
220-
states: List[int],
221-
actions: List[int],
222-
p_a: float,
223-
p_c_1: float,
224-
p_c_2: float,
225-
p_u: float,
226-
) -> List[List[List[float]]]:
211+
def transition_tensor(states: List[int], actions: List[int], p_a: float, p_c_1: float, p_c_2: float, p_u: float) \
212+
-> List[List[List[float]]]:
227213
"""
228214
Creates a |A|x|S|x|S| tensor with the transition probabilities of the POMDP
229215
@@ -258,13 +244,8 @@ def transition_tensor(
258244
return transition_tensor
259245

260246
@staticmethod
261-
def transition_tensor_game(
262-
states: List[int],
263-
defender_actions: List[int],
264-
attacker_actions: List[int],
265-
p_a: float,
266-
p_c_1: float,
267-
) -> List[List[List[List[float]]]]:
247+
def transition_tensor_game(states: List[int], defender_actions: List[int], attacker_actions: List[int], p_a: float,
248+
p_c_1: float) -> List[List[List[List[float]]]]:
268249
"""
269250
Creates a |A|x|A|x|S|x|S| tensor with the transition probabilities of the POSG
270251
@@ -304,9 +285,7 @@ def sample_initial_state(b1: List[float]) -> int:
304285
return int(np.random.choice(np.arange(0, len(b1)), p=b1))
305286

306287
@staticmethod
307-
def sample_next_observation(
308-
observation_tensor: List[List[float]], s_prime: int, observations: List[int]
309-
) -> int:
288+
def sample_next_observation(observation_tensor: List[List[float]], s_prime: int, observations: List[int]) -> int:
310289
"""
311290
Samples the next observation
312291
@@ -322,9 +301,7 @@ def sample_next_observation(
322301
return int(o)
323302

324303
@staticmethod
325-
def sample_next_state_game(
326-
transition_tensor: List[List[List[List[float]]]], s: int, a1: int, a2: int
327-
) -> int:
304+
def sample_next_state_game(transition_tensor: List[List[List[List[float]]]], s: int, a1: int, a2: int) -> int:
328305
"""
329306
Samples the next observation
330307
@@ -341,16 +318,8 @@ def sample_next_state_game(
341318
return int(s_prime)
342319

343320
@staticmethod
344-
def bayes_filter(
345-
s_prime: int,
346-
o: int,
347-
a: int,
348-
b: List[float],
349-
states: List[int],
350-
observations: List[int],
351-
observation_tensor: List[List[float]],
352-
transition_tensor: List[List[List[float]]],
353-
) -> float:
321+
def bayes_filter(s_prime: int, o: int, a: int, b: List[float], states: List[int], observations: List[int],
322+
observation_tensor: List[List[float]], transition_tensor: List[List[List[float]]]) -> float:
354323
"""
355324
A Bayesian filter to compute b[s_prime] of the POMDP
356325
@@ -386,14 +355,8 @@ def bayes_filter(
386355
return b_prime_s_prime
387356

388357
@staticmethod
389-
def p_o_given_b_a1_a2(
390-
o: int,
391-
b: List[float],
392-
a: int,
393-
states: List[int],
394-
transition_tensor: List[List[List[float]]],
395-
observation_tensor: List[List[float]],
396-
) -> float:
358+
def p_o_given_b_a1_a2(o: int, b: List[float], a: int, states: List[int], transition_tensor: List[List[List[float]]],
359+
observation_tensor: List[List[float]]) -> float:
397360
"""
398361
Computes P[o|a,b] of the POMDP
399362
@@ -417,15 +380,8 @@ def p_o_given_b_a1_a2(
417380
return prob
418381

419382
@staticmethod
420-
def next_belief(
421-
o: int,
422-
a: int,
423-
b: List[float],
424-
states: List[int],
425-
observations: List[int],
426-
observation_tensor: List[List[float]],
427-
transition_tensor: List[List[List[float]]],
428-
) -> List[float]:
383+
def next_belief(o: int, a: int, b: List[float], states: List[int], observations: List[int],
384+
observation_tensor: List[List[float]], transition_tensor: List[List[List[float]]]) -> List[float]:
429385
"""
430386
Computes the next belief using a Bayesian filter
431387
@@ -441,15 +397,8 @@ def next_belief(
441397
b_prime = [0.0] * len(states)
442398
for s_prime in states:
443399
b_prime[s_prime] = IntrusionRecoveryPomdpUtil.bayes_filter(
444-
s_prime=s_prime,
445-
o=o,
446-
a=a,
447-
b=b,
448-
states=states,
449-
observations=observations,
450-
transition_tensor=transition_tensor,
451-
observation_tensor=observation_tensor,
452-
)
400+
s_prime=s_prime, o=o, a=a, b=b, states=states, observations=observations,
401+
transition_tensor=transition_tensor, observation_tensor=observation_tensor)
453402
if round(sum(b_prime), 2) != 1:
454403
print(f"error, b_prime:{b_prime}, o:{o}, a:{a}, b:{b}")
455404
assert round(sum(b_prime), 2) == 1

simulation-system/libs/csle-tolerance/tests/test_intrusion_recovery_game_config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ def test_from_json_file(self, mocker: pytest_mock.MockFixture) -> None:
223223
cost_tensor=cost_tensor,
224224
observation_tensor=observation_tensor,
225225
transition_tensor=transition_tensor,
226-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a),
226+
b1=IntrusionRecoveryPomdpUtil.initial_belief(),
227227
T=int(BTR),
228228
simulation_env_name=simulation_name,
229229
gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1",

simulation-system/libs/csle-tolerance/tests/test_intrusion_recovery_pomdp_config.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def test_from_json_file(self, mocker: pytest_mock.MockFixture) -> None:
244244
cost_tensor=cost_tensor,
245245
observation_tensor=observation_tensor,
246246
transition_tensor=transition_tensor,
247-
b1=IntrusionRecoveryPomdpUtil.initial_belief(p_a=p_a),
247+
b1=IntrusionRecoveryPomdpUtil.initial_belief(),
248248
T=BTR,
249249
simulation_env_name=simulation_name,
250250
gym_env_name="csle-tolerance-intrusion-recovery-pomdp-v1",

simulation-system/libs/csle-tolerance/tests/test_intrusion_recovery_pomdp_util.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_initial_belief(self) -> None:
3030
3131
:return: None
3232
"""
33-
assert sum(IntrusionRecoveryPomdpUtil.initial_belief(p_a=0.5)) == 1
33+
assert sum(IntrusionRecoveryPomdpUtil.initial_belief()) == 1
3434

3535
def test_action_space(self) -> None:
3636
"""

0 commit comments

Comments
 (0)