@@ -24,7 +24,7 @@ def state_space() -> List[int]:
24
24
return [0 , 1 , 2 ]
25
25
26
26
@staticmethod
27
- def initial_belief (p_a : float ) -> List [float ]:
27
+ def initial_belief () -> List [float ]:
28
28
"""
29
29
Gets the initial belief state of the POMDP
30
30
@@ -72,9 +72,7 @@ def cost_function(s: int, a: int, eta: float, negate: bool = False) -> float:
72
72
return cost
73
73
74
74
@staticmethod
75
- def cost_tensor (
76
- eta : float , states : List [int ], actions : List [int ], negate : bool = False
77
- ) -> List [List [float ]]:
75
+ def cost_tensor (eta : float , states : List [int ], actions : List [int ], negate : bool = False ) -> List [List [float ]]:
78
76
"""
79
77
Creates a |A|x|S| tensor with the costs (or rewards) of the POMDP
80
78
@@ -119,9 +117,7 @@ def observation_function(s: int, o: int, num_observations: int) -> float:
119
117
return 0.0
120
118
121
119
@staticmethod
122
- def observation_tensor (
123
- states : List [int ], observations : List [int ]
124
- ) -> List [List [float ]]:
120
+ def observation_tensor (states : List [int ], observations : List [int ]) -> List [List [float ]]:
125
121
"""
126
122
Creates a |S|x|O| tensor with the observation probabilities
127
123
@@ -143,9 +139,7 @@ def observation_tensor(
143
139
return observation_tensor
144
140
145
141
@staticmethod
146
- def transition_function (
147
- s : int , s_prime : int , a : int , p_a : float , p_c_1 : float , p_u : float , p_c_2 : float
148
- ) -> float :
142
+ def transition_function (s : int , s_prime : int , a : int , p_a : float , p_c_1 : float , p_u : float , p_c_2 : float ) -> float :
149
143
"""
150
144
The transition function of the POMDP
151
145
@@ -184,9 +178,7 @@ def transition_function(
184
178
return 0
185
179
186
180
@staticmethod
187
- def transition_function_game (
188
- s : int , s_prime : int , a1 : int , a2 : int , p_a : float , p_c_1 : float
189
- ) -> float :
181
+ def transition_function_game (s : int , s_prime : int , a1 : int , a2 : int , p_a : float , p_c_1 : float ) -> float :
190
182
"""
191
183
The transition function of the POSG
192
184
@@ -216,14 +208,8 @@ def transition_function_game(
216
208
return 0
217
209
218
210
@staticmethod
219
- def transition_tensor (
220
- states : List [int ],
221
- actions : List [int ],
222
- p_a : float ,
223
- p_c_1 : float ,
224
- p_c_2 : float ,
225
- p_u : float ,
226
- ) -> List [List [List [float ]]]:
211
+ def transition_tensor (states : List [int ], actions : List [int ], p_a : float , p_c_1 : float , p_c_2 : float , p_u : float ) \
212
+ -> List [List [List [float ]]]:
227
213
"""
228
214
Creates a |A|x|S|x|S| tensor with the transition probabilities of the POMDP
229
215
@@ -258,13 +244,8 @@ def transition_tensor(
258
244
return transition_tensor
259
245
260
246
@staticmethod
261
- def transition_tensor_game (
262
- states : List [int ],
263
- defender_actions : List [int ],
264
- attacker_actions : List [int ],
265
- p_a : float ,
266
- p_c_1 : float ,
267
- ) -> List [List [List [List [float ]]]]:
247
+ def transition_tensor_game (states : List [int ], defender_actions : List [int ], attacker_actions : List [int ], p_a : float ,
248
+ p_c_1 : float ) -> List [List [List [List [float ]]]]:
268
249
"""
269
250
Creates a |A|x|A|x|S|x|S| tensor with the transition probabilities of the POSG
270
251
@@ -304,9 +285,7 @@ def sample_initial_state(b1: List[float]) -> int:
304
285
return int (np .random .choice (np .arange (0 , len (b1 )), p = b1 ))
305
286
306
287
@staticmethod
307
- def sample_next_observation (
308
- observation_tensor : List [List [float ]], s_prime : int , observations : List [int ]
309
- ) -> int :
288
+ def sample_next_observation (observation_tensor : List [List [float ]], s_prime : int , observations : List [int ]) -> int :
310
289
"""
311
290
Samples the next observation
312
291
@@ -322,9 +301,7 @@ def sample_next_observation(
322
301
return int (o )
323
302
324
303
@staticmethod
325
- def sample_next_state_game (
326
- transition_tensor : List [List [List [List [float ]]]], s : int , a1 : int , a2 : int
327
- ) -> int :
304
+ def sample_next_state_game (transition_tensor : List [List [List [List [float ]]]], s : int , a1 : int , a2 : int ) -> int :
328
305
"""
329
306
Samples the next observation
330
307
@@ -341,16 +318,8 @@ def sample_next_state_game(
341
318
return int (s_prime )
342
319
343
320
@staticmethod
344
- def bayes_filter (
345
- s_prime : int ,
346
- o : int ,
347
- a : int ,
348
- b : List [float ],
349
- states : List [int ],
350
- observations : List [int ],
351
- observation_tensor : List [List [float ]],
352
- transition_tensor : List [List [List [float ]]],
353
- ) -> float :
321
+ def bayes_filter (s_prime : int , o : int , a : int , b : List [float ], states : List [int ], observations : List [int ],
322
+ observation_tensor : List [List [float ]], transition_tensor : List [List [List [float ]]]) -> float :
354
323
"""
355
324
A Bayesian filter to compute b[s_prime] of the POMDP
356
325
@@ -386,14 +355,8 @@ def bayes_filter(
386
355
return b_prime_s_prime
387
356
388
357
@staticmethod
389
- def p_o_given_b_a1_a2 (
390
- o : int ,
391
- b : List [float ],
392
- a : int ,
393
- states : List [int ],
394
- transition_tensor : List [List [List [float ]]],
395
- observation_tensor : List [List [float ]],
396
- ) -> float :
358
+ def p_o_given_b_a1_a2 (o : int , b : List [float ], a : int , states : List [int ], transition_tensor : List [List [List [float ]]],
359
+ observation_tensor : List [List [float ]]) -> float :
397
360
"""
398
361
Computes P[o|a,b] of the POMDP
399
362
@@ -417,15 +380,8 @@ def p_o_given_b_a1_a2(
417
380
return prob
418
381
419
382
@staticmethod
420
- def next_belief (
421
- o : int ,
422
- a : int ,
423
- b : List [float ],
424
- states : List [int ],
425
- observations : List [int ],
426
- observation_tensor : List [List [float ]],
427
- transition_tensor : List [List [List [float ]]],
428
- ) -> List [float ]:
383
+ def next_belief (o : int , a : int , b : List [float ], states : List [int ], observations : List [int ],
384
+ observation_tensor : List [List [float ]], transition_tensor : List [List [List [float ]]]) -> List [float ]:
429
385
"""
430
386
Computes the next belief using a Bayesian filter
431
387
@@ -441,15 +397,8 @@ def next_belief(
441
397
b_prime = [0.0 ] * len (states )
442
398
for s_prime in states :
443
399
b_prime [s_prime ] = IntrusionRecoveryPomdpUtil .bayes_filter (
444
- s_prime = s_prime ,
445
- o = o ,
446
- a = a ,
447
- b = b ,
448
- states = states ,
449
- observations = observations ,
450
- transition_tensor = transition_tensor ,
451
- observation_tensor = observation_tensor ,
452
- )
400
+ s_prime = s_prime , o = o , a = a , b = b , states = states , observations = observations ,
401
+ transition_tensor = transition_tensor , observation_tensor = observation_tensor )
453
402
if round (sum (b_prime ), 2 ) != 1 :
454
403
print (f"error, b_prime:{ b_prime } , o:{ o } , a:{ a } , b:{ b } " )
455
404
assert round (sum (b_prime ), 2 ) == 1
0 commit comments