-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
117 lines (96 loc) · 3.78 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy as np
import random
import scipy
import torch
def set_random_seeds(seed=0):
"""Set random seeds across libraries for reproducibility.
Parameters
----------
seed : int, optional (default: 0)
The seed value for pseudorandom number generation.
"""
seed = abs(seed)
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def kde(data):
"""Compute the kernel density estimation (KDE) for a given dataset.
Parameters
----------
data : np.ndarray
Input data of shape (n, d), where n is the number of samples and d is the dimensionality.
Returns
-------
scipy.stats.gaussian_kde
A KDE object that has been fit to the input data.
"""
return scipy.stats.gaussian_kde(data.T)
def kl_divergence(active_kde, quiescent_kde, n=2500):
"""Compute a Monte Carlo estimate of the Kullback-Leibler (KL) divergence.
Computes the KL divergence between two kernel density estimations (KDEs), or a continuous
distribution and a KDE, using a Monte Carlo estimate.
Parameters
----------
active_kde : scipy.stats.gaussian_kde
A KDE object representing the active distribution.
quiescent_kde : scipy.stats.gaussian_kde or scipy.stats.rv_continuous
A KDE object or continuous distribution representing the quiescent distribution.
n : int, optional (default: 2500)
The number of points to resample for computing the KL divergence.
Returns
-------
np.float64
The KL divergence between the quiescent and active distributions.
Notes
-----
1. The KL divergence is estimated using a Monte Carlo method:
KL(P || Q) ≈ (1/n) * Σ[log(P(x_i) / Q(x_i))] for i=1 to n,
where P(x_i) and Q(x_i) are the probability densities of the quiescent and active
distributions respectively, and x_i are points sampled from the quiescent distribution.
2. The KL divergence is undefined if the support of Q(x) is not a subset of the support of
P(x). In such cases, this function may return NaN.
3. It is assumed that all KDE objects are properly fitted.
"""
if isinstance(quiescent_kde, scipy.stats.gaussian_kde):
points = quiescent_kde.resample(n)
elif isinstance(getattr(quiescent_kde, "dist", None), scipy.stats.rv_continuous):
points = quiescent_kde.rvs((n, active_kde.d)).T
else:
raise NotImplementedError("unknown distribution type.")
quiescent_pdf = quiescent_kde.pdf(points)
active_pdf = active_kde.pdf(points)
return np.log((quiescent_pdf / active_pdf).clip(1e-7, 1e7)).mean()
def output_variance(data, shift=1000):
"""Compute the variance summed over dimensions for output trajectories.
Parameters
----------
data : np.ndarray
A batch of output trajectories for which to compute the metric.
shift : int (default: 1000)
The number of initial timesteps to ignore.
Returns
-------
np.float64
The variance summed over dimensions for output trajectories, averaged across the batch.
"""
b, _, d = data.shape
covs = np.array([np.cov(data[i, shift:].T) for i in range(b)])
if d > 1:
covs = np.trace(covs, axis1=1, axis2=2)
return covs.mean()
def stepwise_distance(data):
"""Compute the average point-to-point distance for output trajectories.
Parameters
----------
data : np.ndarray
A batch of output trajectories for which to compute the metric.
Returns
-------
np.ndarray
The average point-to-point distance for each output trajectory.
"""
return ((np.diff(data, axis=1) ** 2).sum(axis=-1) ** 0.5).mean(axis=-1)