Skip to content

Commit f815a23

Browse files
Circle CICircle CI
Circle CI
authored and
Circle CI
committed
CircleCI update of dev docs (2851).
1 parent cc91ab0 commit f815a23

File tree

358 files changed

+739925
-737291
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

358 files changed

+739925
-737291
lines changed
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
# -*- coding: utf-8 -*-
2+
r"""
3+
======================================================================================================================================
4+
Detecting outliers by learning sample marginal distribution with CO-Optimal Transport and by using unbalanced Co-Optimal Transport
5+
======================================================================================================================================
6+
7+
In this example, we consider two point clouds living in different Euclidean spaces, where the outliers
8+
are artifically injected into the target data. We illustrate two methods which allow to filter out
9+
these outliers.
10+
11+
The first method requires learning the sample marginal distribution which minimizes
12+
the CO-Optimal Transport distance [49] between two input spaces.
13+
More precisely, given a source data
14+
:math:`(X, \mu_x^{(s)}, \mu_x^{(f)})` and a target matrix :math:`Y` associated with a fixed
15+
histogram on features :math:`\mu_y^{(f)}`, we want to solve the following problem
16+
17+
.. math::
18+
\min_{\mu_y^{(s)} \in \Delta} \text{COOT}\left( (X, \mu_x^{(s)}, \mu_x^{(f)}), (Y, \mu_y^{(s)}, \mu_y^{(f)}) \right)
19+
20+
where :math:`\Delta` is the probability simplex. This minimization is done with a
21+
simple projected gradient descent in PyTorch. We use the automatic backend of POT that
22+
allows us to compute the CO-Optimal Transport distance with :func:`ot.coot.co_optimal_transport2`
23+
with differentiable losses.
24+
25+
The second method simply requires direct application of unbalanced Co-Optimal Transport [71].
26+
More precisely, it is enough to use the sample and feature coupling from solving
27+
28+
.. math::
29+
\text{UCOOT}\left( (X, \mu_x^{(s)}, \mu_x^{(f)}), (Y, \mu_y^{(s)}, \mu_y^{(f)}) \right)
30+
31+
where all the marginal distributions are uniform.
32+
33+
.. [49] Redko, I., Vayer, T., Flamary, R., and Courty, N. (2020).
34+
`CO-Optimal Transport <https://proceedings.neurips.cc/paper/2020/file/cc384c68ad503482fb24e6d1e3b512ae-Paper.pdf>`_.
35+
Advances in Neural Information Processing Systems, 33.
36+
.. [71] H. Tran, H. Janati, N. Courty, R. Flamary, I. Redko, P. Demetci & R. Singh (2023). [Unbalanced Co-Optimal Transport](https://dl.acm.org/doi/10.1609/aaai.v37i8.26193).
37+
AAAI Conference on Artificial Intelligence.
38+
"""
39+
40+
# Author: Remi Flamary <[email protected]>
41+
# Quang Huy Tran <[email protected]>
42+
# License: MIT License
43+
44+
from matplotlib.patches import ConnectionPatch
45+
import torch
46+
import numpy as np
47+
48+
import matplotlib.pyplot as pl
49+
import ot
50+
51+
from ot.coot import co_optimal_transport as coot
52+
from ot.coot import co_optimal_transport2 as coot2
53+
from ot.gromov._unbalanced import unbalanced_co_optimal_transport
54+
55+
56+
# %%
57+
# Generate data
58+
# -------------
59+
# The source and clean target matrices are generated by
60+
# :math:`X_{i,j} = \cos(\frac{i}{n_1} \pi) + \cos(\frac{j}{d_1} \pi)` and
61+
# :math:`Y_{i,j} = \cos(\frac{i}{n_2} \pi) + \cos(\frac{j}{d_2} \pi)`.
62+
# The target matrix is then contaminated by adding 5 row outliers.
63+
# Intuitively, we expect that the estimated sample distribution should ignore these outliers,
64+
# i.e. their weights should be zero.
65+
66+
np.random.seed(182)
67+
68+
n1, d1 = 20, 16
69+
n2, d2 = 10, 8
70+
n = 15
71+
72+
X = (
73+
torch.cos(torch.arange(n1) * torch.pi / n1)[:, None] +
74+
torch.cos(torch.arange(d1) * torch.pi / d1)[None, :]
75+
)
76+
77+
# Generate clean target data mixed with outliers
78+
Y_noisy = torch.randn((n, d2)) * 10.0
79+
Y_noisy[:n2, :] = (
80+
torch.cos(torch.arange(n2) * torch.pi / n2)[:, None] +
81+
torch.cos(torch.arange(d2) * torch.pi / d2)[None, :]
82+
)
83+
Y = Y_noisy[:n2, :]
84+
85+
X, Y_noisy, Y = X.double(), Y_noisy.double(), Y.double()
86+
87+
fig, axes = pl.subplots(nrows=1, ncols=3, figsize=(12, 5))
88+
axes[0].imshow(X, vmin=-2, vmax=2)
89+
axes[0].set_title('$X$')
90+
91+
axes[1].imshow(Y, vmin=-2, vmax=2)
92+
axes[1].set_title('Clean $Y$')
93+
94+
axes[2].imshow(Y_noisy, vmin=-2, vmax=2)
95+
axes[2].set_title('Noisy $Y$')
96+
97+
pl.tight_layout()
98+
99+
# %%
100+
# Optimize the COOT distance with respect to the sample marginal distribution
101+
# ---------------------------------------------------------------------------
102+
103+
losses = []
104+
lr = 1e-3
105+
niter = 1000
106+
107+
b = torch.tensor(ot.unif(n), requires_grad=True)
108+
109+
for i in range(niter):
110+
111+
loss = coot2(X, Y_noisy, wy_samp=b, log=False, verbose=False)
112+
losses.append(float(loss))
113+
114+
loss.backward()
115+
116+
with torch.no_grad():
117+
b -= lr * b.grad # gradient step
118+
b[:] = ot.utils.proj_simplex(b) # projection on the simplex
119+
120+
b.grad.zero_()
121+
122+
# Estimated sample marginal distribution and training loss curve
123+
pl.plot(losses[10:])
124+
pl.title('CO-Optimal Transport distance')
125+
126+
print(f"Marginal distribution = {b.detach().numpy()}")
127+
128+
# %%
129+
# Visualizing the row and column alignments with the estimated sample marginal distribution
130+
# -----------------------------------------------------------------------------------------
131+
#
132+
# Clearly, the learned marginal distribution completely and successfully ignores the 5 outliers.
133+
134+
X, Y_noisy = X.numpy(), Y_noisy.numpy()
135+
b = b.detach().numpy()
136+
137+
pi_sample, pi_feature = coot(X, Y_noisy, wy_samp=b, log=False, verbose=True)
138+
139+
fig = pl.figure(4, (9, 7))
140+
pl.clf()
141+
142+
ax1 = pl.subplot(2, 2, 3)
143+
pl.imshow(X, vmin=-2, vmax=2)
144+
pl.xlabel('$X$')
145+
146+
ax2 = pl.subplot(2, 2, 2)
147+
ax2.yaxis.tick_right()
148+
pl.imshow(np.transpose(Y_noisy), vmin=-2, vmax=2)
149+
pl.title("Transpose(Noisy $Y$)")
150+
ax2.xaxis.tick_top()
151+
152+
for i in range(n1):
153+
j = np.argmax(pi_sample[i, :])
154+
xyA = (d1 - .5, i)
155+
xyB = (j, d2 - .5)
156+
con = ConnectionPatch(xyA=xyA, xyB=xyB, coordsA=ax1.transData,
157+
coordsB=ax2.transData, color="black")
158+
fig.add_artist(con)
159+
160+
for i in range(d1):
161+
j = np.argmax(pi_feature[i, :])
162+
xyA = (i, -.5)
163+
xyB = (-.5, j)
164+
con = ConnectionPatch(
165+
xyA=xyA, xyB=xyB, coordsA=ax1.transData, coordsB=ax2.transData, color="blue")
166+
fig.add_artist(con)
167+
168+
# %%
169+
# Now, let see if we can use unbalanced Co-Optimal Transport to recover the clean OT plans,
170+
# without the need of learning the marginal distribution as in Co-Optimal Transport.
171+
# -----------------------------------------------------------------------------------------
172+
173+
pi_sample, pi_feature = unbalanced_co_optimal_transport(
174+
X=X, Y=Y_noisy, reg_marginals=(10, 10), epsilon=0, divergence="kl",
175+
unbalanced_solver="mm", max_iter=1000, tol=1e-6,
176+
max_iter_ot=1000, tol_ot=1e-6, log=False, verbose=False
177+
)
178+
179+
# %%
180+
# Visualizing the row and column alignments learned by unbalanced Co-Optimal Transport.
181+
# -----------------------------------------------------------------------------------------
182+
#
183+
# Similar to Co-Optimal Transport, we are also be able to fully recover the clean OT plans.
184+
185+
fig = pl.figure(4, (9, 7))
186+
pl.clf()
187+
188+
ax1 = pl.subplot(2, 2, 3)
189+
pl.imshow(X, vmin=-2, vmax=2)
190+
pl.xlabel('$X$')
191+
192+
ax2 = pl.subplot(2, 2, 2)
193+
ax2.yaxis.tick_right()
194+
pl.imshow(np.transpose(Y_noisy), vmin=-2, vmax=2)
195+
pl.title("Transpose(Noisy $Y$)")
196+
ax2.xaxis.tick_top()
197+
198+
for i in range(n1):
199+
j = np.argmax(pi_sample[i, :])
200+
xyA = (d1 - .5, i)
201+
xyB = (j, d2 - .5)
202+
con = ConnectionPatch(xyA=xyA, xyB=xyB, coordsA=ax1.transData,
203+
coordsB=ax2.transData, color="black")
204+
fig.add_artist(con)
205+
206+
for i in range(d1):
207+
j = np.argmax(pi_feature[i, :])
208+
xyA = (i, -.5)
209+
xyB = (-.5, j)
210+
con = ConnectionPatch(
211+
xyA=xyA, xyB=xyB, coordsA=ax1.transData, coordsB=ax2.transData, color="blue")
212+
fig.add_artist(con)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"\n# Detecting outliers by learning sample marginal distribution with CO-Optimal Transport and by using unbalanced Co-Optimal Transport\n\nIn this example, we consider two point clouds living in different Euclidean spaces, where the outliers\nare artifically injected into the target data. We illustrate two methods which allow to filter out\nthese outliers.\n\nThe first method requires learning the sample marginal distribution which minimizes\nthe CO-Optimal Transport distance [49] between two input spaces.\nMore precisely, given a source data\n$(X, \\mu_x^{(s)}, \\mu_x^{(f)})$ and a target matrix $Y$ associated with a fixed\nhistogram on features $\\mu_y^{(f)}$, we want to solve the following problem\n\n\\begin{align}\\min_{\\mu_y^{(s)} \\in \\Delta} \\text{COOT}\\left( (X, \\mu_x^{(s)}, \\mu_x^{(f)}), (Y, \\mu_y^{(s)}, \\mu_y^{(f)}) \\right)\\end{align}\n\nwhere $\\Delta$ is the probability simplex. This minimization is done with a\nsimple projected gradient descent in PyTorch. We use the automatic backend of POT that\nallows us to compute the CO-Optimal Transport distance with :func:`ot.coot.co_optimal_transport2`\nwith differentiable losses.\n\nThe second method simply requires direct application of unbalanced Co-Optimal Transport [71].\nMore precisely, it is enough to use the sample and feature coupling from solving\n\n\\begin{align}\\text{UCOOT}\\left( (X, \\mu_x^{(s)}, \\mu_x^{(f)}), (Y, \\mu_y^{(s)}, \\mu_y^{(f)}) \\right)\\end{align}\n\nwhere all the marginal distributions are uniform.\n\n.. [49] Redko, I., Vayer, T., Flamary, R., and Courty, N. (2020).\n [CO-Optimal Transport](https://proceedings.neurips.cc/paper/2020/file/cc384c68ad503482fb24e6d1e3b512ae-Paper.pdf).\n Advances in Neural Information Processing Systems, 33.\n.. [71] H. Tran, H. Janati, N. Courty, R. Flamary, I. Redko, P. Demetci & R. Singh (2023). [Unbalanced Co-Optimal Transport](https://dl.acm.org/doi/10.1609/aaai.v37i8.26193).\n AAAI Conference on Artificial Intelligence.\n"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {
14+
"collapsed": false
15+
},
16+
"outputs": [],
17+
"source": [
18+
"# Author: Remi Flamary <[email protected]>\n# Quang Huy Tran <[email protected]>\n# License: MIT License\n\nfrom matplotlib.patches import ConnectionPatch\nimport torch\nimport numpy as np\n\nimport matplotlib.pyplot as pl\nimport ot\n\nfrom ot.coot import co_optimal_transport as coot\nfrom ot.coot import co_optimal_transport2 as coot2\nfrom ot.gromov._unbalanced import unbalanced_co_optimal_transport"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"## Generate data\nThe source and clean target matrices are generated by\n$X_{i,j} = \\cos(\\frac{i}{n_1} \\pi) + \\cos(\\frac{j}{d_1} \\pi)$ and\n$Y_{i,j} = \\cos(\\frac{i}{n_2} \\pi) + \\cos(\\frac{j}{d_2} \\pi)$.\nThe target matrix is then contaminated by adding 5 row outliers.\nIntuitively, we expect that the estimated sample distribution should ignore these outliers,\ni.e. their weights should be zero.\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"np.random.seed(182)\n\nn1, d1 = 20, 16\nn2, d2 = 10, 8\nn = 15\n\nX = (\n torch.cos(torch.arange(n1) * torch.pi / n1)[:, None] +\n torch.cos(torch.arange(d1) * torch.pi / d1)[None, :]\n)\n\n# Generate clean target data mixed with outliers\nY_noisy = torch.randn((n, d2)) * 10.0\nY_noisy[:n2, :] = (\n torch.cos(torch.arange(n2) * torch.pi / n2)[:, None] +\n torch.cos(torch.arange(d2) * torch.pi / d2)[None, :]\n)\nY = Y_noisy[:n2, :]\n\nX, Y_noisy, Y = X.double(), Y_noisy.double(), Y.double()\n\nfig, axes = pl.subplots(nrows=1, ncols=3, figsize=(12, 5))\naxes[0].imshow(X, vmin=-2, vmax=2)\naxes[0].set_title('$X$')\n\naxes[1].imshow(Y, vmin=-2, vmax=2)\naxes[1].set_title('Clean $Y$')\n\naxes[2].imshow(Y_noisy, vmin=-2, vmax=2)\naxes[2].set_title('Noisy $Y$')\n\npl.tight_layout()"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"## Optimize the COOT distance with respect to the sample marginal distribution\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"losses = []\nlr = 1e-3\nniter = 1000\n\nb = torch.tensor(ot.unif(n), requires_grad=True)\n\nfor i in range(niter):\n\n loss = coot2(X, Y_noisy, wy_samp=b, log=False, verbose=False)\n losses.append(float(loss))\n\n loss.backward()\n\n with torch.no_grad():\n b -= lr * b.grad # gradient step\n b[:] = ot.utils.proj_simplex(b) # projection on the simplex\n\n b.grad.zero_()\n\n# Estimated sample marginal distribution and training loss curve\npl.plot(losses[10:])\npl.title('CO-Optimal Transport distance')\n\nprint(f\"Marginal distribution = {b.detach().numpy()}\")"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"## Visualizing the row and column alignments with the estimated sample marginal distribution\n\nClearly, the learned marginal distribution completely and successfully ignores the 5 outliers.\n\n"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {
68+
"collapsed": false
69+
},
70+
"outputs": [],
71+
"source": [
72+
"X, Y_noisy = X.numpy(), Y_noisy.numpy()\nb = b.detach().numpy()\n\npi_sample, pi_feature = coot(X, Y_noisy, wy_samp=b, log=False, verbose=True)\n\nfig = pl.figure(4, (9, 7))\npl.clf()\n\nax1 = pl.subplot(2, 2, 3)\npl.imshow(X, vmin=-2, vmax=2)\npl.xlabel('$X$')\n\nax2 = pl.subplot(2, 2, 2)\nax2.yaxis.tick_right()\npl.imshow(np.transpose(Y_noisy), vmin=-2, vmax=2)\npl.title(\"Transpose(Noisy $Y$)\")\nax2.xaxis.tick_top()\n\nfor i in range(n1):\n j = np.argmax(pi_sample[i, :])\n xyA = (d1 - .5, i)\n xyB = (j, d2 - .5)\n con = ConnectionPatch(xyA=xyA, xyB=xyB, coordsA=ax1.transData,\n coordsB=ax2.transData, color=\"black\")\n fig.add_artist(con)\n\nfor i in range(d1):\n j = np.argmax(pi_feature[i, :])\n xyA = (i, -.5)\n xyB = (-.5, j)\n con = ConnectionPatch(\n xyA=xyA, xyB=xyB, coordsA=ax1.transData, coordsB=ax2.transData, color=\"blue\")\n fig.add_artist(con)"
73+
]
74+
},
75+
{
76+
"cell_type": "markdown",
77+
"metadata": {},
78+
"source": [
79+
"Now, let see if we can use unbalanced Co-Optimal Transport to recover the clean OT plans,\nwithout the need of learning the marginal distribution as in Co-Optimal Transport.\n-----------------------------------------------------------------------------------------\n\n"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {
86+
"collapsed": false
87+
},
88+
"outputs": [],
89+
"source": [
90+
"pi_sample, pi_feature = unbalanced_co_optimal_transport(\n X=X, Y=Y_noisy, reg_marginals=(10, 10), epsilon=0, divergence=\"kl\",\n unbalanced_solver=\"mm\", max_iter=1000, tol=1e-6,\n max_iter_ot=1000, tol_ot=1e-6, log=False, verbose=False\n)"
91+
]
92+
},
93+
{
94+
"cell_type": "markdown",
95+
"metadata": {},
96+
"source": [
97+
"## Visualizing the row and column alignments learned by unbalanced Co-Optimal Transport.\n\nSimilar to Co-Optimal Transport, we are also be able to fully recover the clean OT plans.\n\n"
98+
]
99+
},
100+
{
101+
"cell_type": "code",
102+
"execution_count": null,
103+
"metadata": {
104+
"collapsed": false
105+
},
106+
"outputs": [],
107+
"source": [
108+
"fig = pl.figure(4, (9, 7))\npl.clf()\n\nax1 = pl.subplot(2, 2, 3)\npl.imshow(X, vmin=-2, vmax=2)\npl.xlabel('$X$')\n\nax2 = pl.subplot(2, 2, 2)\nax2.yaxis.tick_right()\npl.imshow(np.transpose(Y_noisy), vmin=-2, vmax=2)\npl.title(\"Transpose(Noisy $Y$)\")\nax2.xaxis.tick_top()\n\nfor i in range(n1):\n j = np.argmax(pi_sample[i, :])\n xyA = (d1 - .5, i)\n xyB = (j, d2 - .5)\n con = ConnectionPatch(xyA=xyA, xyB=xyB, coordsA=ax1.transData,\n coordsB=ax2.transData, color=\"black\")\n fig.add_artist(con)\n\nfor i in range(d1):\n j = np.argmax(pi_feature[i, :])\n xyA = (i, -.5)\n xyB = (-.5, j)\n con = ConnectionPatch(\n xyA=xyA, xyB=xyB, coordsA=ax1.transData, coordsB=ax2.transData, color=\"blue\")\n fig.add_artist(con)"
109+
]
110+
}
111+
],
112+
"metadata": {
113+
"kernelspec": {
114+
"display_name": "Python 3",
115+
"language": "python",
116+
"name": "python3"
117+
},
118+
"language_info": {
119+
"codemirror_mode": {
120+
"name": "ipython",
121+
"version": 3
122+
},
123+
"file_extension": ".py",
124+
"mimetype": "text/x-python",
125+
"name": "python",
126+
"nbconvert_exporter": "python",
127+
"pygments_lexer": "ipython3",
128+
"version": "3.10.15"
129+
}
130+
},
131+
"nbformat": 4,
132+
"nbformat_minor": 0
133+
}
-608 Bytes
329 Bytes
-99 Bytes
-97 Bytes
-316 Bytes
-345 Bytes
-241 Bytes
486 Bytes
-23 Bytes
629 Bytes
-94 Bytes
-209 Bytes
-707 Bytes
-103 Bytes

0 commit comments

Comments
 (0)