@@ -390,7 +390,7 @@ def pomdp_solver_file(config: StoppingGameConfig, discount_factor: float, pi2: n
390
390
return file_str
391
391
392
392
@staticmethod
393
- def reduce_T_attacker (T : npt .NDArray [np .float_ ], strategy : Policy ) -> npt .NDArray [np .float_ ]:
393
+ def reduce_T_attacker (T : npt .NDArray [np .float64 ], strategy : Policy ) -> npt .NDArray [np .float64 ]:
394
394
"""
395
395
Reduces the transition tensor based on a given attacker strategy
396
396
@@ -415,7 +415,7 @@ def reduce_T_attacker(T: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArra
415
415
return reduced_T
416
416
417
417
@staticmethod
418
- def reduce_R_attacker (R : npt .NDArray [np .float_ ], strategy : Policy ) -> npt .NDArray [np .float_ ]:
418
+ def reduce_R_attacker (R : npt .NDArray [np .float64 ], strategy : Policy ) -> npt .NDArray [np .float64 ]:
419
419
"""
420
420
Reduces the reward tensor based on a given attacker strategy
421
421
@@ -433,7 +433,7 @@ def reduce_R_attacker(R: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArra
433
433
return reduced_R
434
434
435
435
@staticmethod
436
- def reduce_Z_attacker (Z : npt .NDArray [np .float_ ], strategy : Policy ) -> npt .NDArray [np .float_ ]:
436
+ def reduce_Z_attacker (Z : npt .NDArray [np .float64 ], strategy : Policy ) -> npt .NDArray [np .float64 ]:
437
437
"""
438
438
Reduces the observation tensor based on a given attacker strategy
439
439
@@ -450,7 +450,7 @@ def reduce_Z_attacker(Z: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArra
450
450
return reduced_Z
451
451
452
452
@staticmethod
453
- def reduce_T_defender (T : npt .NDArray [np .float_ ], strategy : Policy ) -> npt .NDArray [np .float_ ]:
453
+ def reduce_T_defender (T : npt .NDArray [np .float64 ], strategy : Policy ) -> npt .NDArray [np .float64 ]:
454
454
"""
455
455
Reduces the transition tensor based on a given defender strategy
456
456
@@ -469,7 +469,7 @@ def reduce_T_defender(T: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArra
469
469
return reduced_T
470
470
471
471
@staticmethod
472
- def reduce_R_defender (R : npt .NDArray [np .float_ ], strategy : Policy ) -> npt .NDArray [np .float_ ]:
472
+ def reduce_R_defender (R : npt .NDArray [np .float64 ], strategy : Policy ) -> npt .NDArray [np .float64 ]:
473
473
"""
474
474
Reduces the reward tensor based on a given defender strategy
475
475
@@ -487,10 +487,10 @@ def reduce_R_defender(R: npt.NDArray[np.float_], strategy: Policy) -> npt.NDArra
487
487
return reduced_R
488
488
489
489
@staticmethod
490
- def aggregate_belief_mdp_defender (aggregation_resolution : int , T : npt .NDArray [np .float_ ],
491
- R : npt .NDArray [np .float_ ], Z : npt .NDArray [np .float_ ],
490
+ def aggregate_belief_mdp_defender (aggregation_resolution : int , T : npt .NDArray [np .float64 ],
491
+ R : npt .NDArray [np .float64 ], Z : npt .NDArray [np .float64 ],
492
492
S : npt .NDArray [np .int_ ], A : npt .NDArray [np .int_ ], O : npt .NDArray [np .int_ ]) \
493
- -> Tuple [npt .NDArray [np .float_ ], npt .NDArray [np .int_ ], npt .NDArray [np .float_ ], npt .NDArray [np .float_ ]]:
493
+ -> Tuple [npt .NDArray [np .float64 ], npt .NDArray [np .int_ ], npt .NDArray [np .float64 ], npt .NDArray [np .float64 ]]:
494
494
"""
495
495
Generates an aggregate belief MDP from a given POMDP specification and aggregation resolution
496
496
@@ -512,7 +512,7 @@ def aggregate_belief_mdp_defender(aggregation_resolution: int, T: npt.NDArray[np
512
512
return aggregate_belief_space , A , belief_T , belief_R
513
513
514
514
@staticmethod
515
- def generate_aggregate_belief_space (n : int , belief_space_dimension : int ) -> npt .NDArray [np .float_ ]:
515
+ def generate_aggregate_belief_space (n : int , belief_space_dimension : int ) -> npt .NDArray [np .float64 ]:
516
516
"""
517
517
Generate an aggregate belief space B_n.
518
518
@@ -534,8 +534,8 @@ def generate_aggregate_belief_space(n: int, belief_space_dimension: int) -> npt.
534
534
535
535
@staticmethod
536
536
def generate_aggregate_belief_reward_tensor (
537
- aggregate_belief_space : npt .NDArray [np .float_ ], S : npt .NDArray [np .int_ ], A : npt .NDArray [np .int_ ],
538
- R : npt .NDArray [np .float_ ]) -> npt .NDArray [np .float_ ]:
537
+ aggregate_belief_space : npt .NDArray [np .float64 ], S : npt .NDArray [np .int_ ], A : npt .NDArray [np .int_ ],
538
+ R : npt .NDArray [np .float64 ]) -> npt .NDArray [np .float64 ]:
539
539
"""
540
540
Generates an aggregate reward tensor for the aggregate belief MDP
541
541
@@ -557,8 +557,8 @@ def generate_aggregate_belief_reward_tensor(
557
557
558
558
@staticmethod
559
559
def generate_aggregate_belief_transition_operator (
560
- aggregate_belief_space : npt .NDArray [np .float_ ], S : npt .NDArray [np .int_ ], A : npt .NDArray [np .int_ ],
561
- O : npt .NDArray [np .int_ ], T : npt .NDArray [np .float_ ], Z : npt .NDArray [np .float_ ]) -> npt .NDArray [np .float_ ]:
560
+ aggregate_belief_space : npt .NDArray [np .float64 ], S : npt .NDArray [np .int_ ], A : npt .NDArray [np .int_ ],
561
+ O : npt .NDArray [np .int_ ], T : npt .NDArray [np .float64 ], Z : npt .NDArray [np .float64 ]) -> npt .NDArray [np .float64 ]:
562
562
"""
563
563
Generates an aggregate belief space transition operator
564
564
@@ -581,11 +581,11 @@ def generate_aggregate_belief_transition_operator(
581
581
return belief_T
582
582
583
583
@staticmethod
584
- def aggregate_belief_transition_probability (b1 : npt .NDArray [np .float_ ], b2 : npt .NDArray [np .float_ ], a : int ,
584
+ def aggregate_belief_transition_probability (b1 : npt .NDArray [np .float64 ], b2 : npt .NDArray [np .float64 ], a : int ,
585
585
S : npt .NDArray [np .int_ ], O : npt .NDArray [np .int_ ],
586
586
A : npt .NDArray [np .int_ ],
587
- T : npt .NDArray [np .float_ ], Z : npt .NDArray [np .float_ ],
588
- aggregate_belief_space : npt .NDArray [np .float_ ]) -> float :
587
+ T : npt .NDArray [np .float64 ], Z : npt .NDArray [np .float64 ],
588
+ aggregate_belief_space : npt .NDArray [np .float64 ]) -> float :
589
589
"""
590
590
Calculates the probability of transitioning from belief b1 to belief b2 when taking action a
591
591
@@ -616,8 +616,8 @@ def aggregate_belief_transition_probability(b1: npt.NDArray[np.float_], b2: npt.
616
616
617
617
@staticmethod
618
618
def pomdp_next_belief (o : int , a : int , b : npt .NDArray [np .float64 ], states : npt .NDArray [np .int_ ],
619
- observations : npt .NDArray [np .int_ ], observation_tensor : npt .NDArray [np .float_ ],
620
- transition_tensor : npt .NDArray [np .float_ ]) \
619
+ observations : npt .NDArray [np .int_ ], observation_tensor : npt .NDArray [np .float64 ],
620
+ transition_tensor : npt .NDArray [np .float64 ]) \
621
621
-> npt .NDArray [np .float64 ]:
622
622
"""
623
623
Computes the next belief of the POMDP using a Bayesian filter
@@ -643,8 +643,8 @@ def pomdp_next_belief(o: int, a: int, b: npt.NDArray[np.float64], states: npt.ND
643
643
644
644
@staticmethod
645
645
def pomdp_bayes_filter (s_prime : int , o : int , a : int , b : npt .NDArray [np .float64 ], states : npt .NDArray [np .int_ ],
646
- observations : npt .NDArray [np .int_ ], observation_tensor : npt .NDArray [np .float_ ],
647
- transition_tensor : npt .NDArray [np .float_ ]) -> float :
646
+ observations : npt .NDArray [np .int_ ], observation_tensor : npt .NDArray [np .float64 ],
647
+ transition_tensor : npt .NDArray [np .float64 ]) -> float :
648
648
"""
649
649
A Bayesian filter to compute b[s_prime] of the POMDP
650
650
@@ -679,8 +679,8 @@ def pomdp_bayes_filter(s_prime: int, o: int, a: int, b: npt.NDArray[np.float64],
679
679
return b_prime_s_prime
680
680
681
681
@staticmethod
682
- def find_nearest_neighbor_belief (belief_space : npt .NDArray [np .float_ ], target_belief : npt .NDArray [np .float_ ]) \
683
- -> npt .NDArray [np .float_ ]:
682
+ def find_nearest_neighbor_belief (belief_space : npt .NDArray [np .float64 ], target_belief : npt .NDArray [np .float64 ]) \
683
+ -> npt .NDArray [np .float64 ]:
684
684
"""
685
685
Finds the nearest neighbor (in the Euclidean sense) of a given belief in a certain belief space
686
686
0 commit comments