Skip to content

Commit 5e06c2a

Browse files
committed
add attention and deep set references
1 parent dac6550 commit 5e06c2a

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

bibliography.bib

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -354,4 +354,23 @@ @article{pooladian2023multisample
354354
author={Pooladian, Aram-Alexandre and Ben-Hamu, Heli and Domingo-Enrich, Carles and Amos, Brandon and Lipman, Yaron and Chen, Ricky TQ},
355355
journal={arXiv preprint arXiv:2304.14772},
356356
year={2023}
357-
}
357+
}
358+
359+
@article{vaswani2017attention,
360+
title={Attention is all you need},
361+
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
362+
journal={Advances in neural information processing systems},
363+
volume={30},
364+
year={2017}
365+
}
366+
367+
368+
@inproceedings{zaheer_deep_2017,
369+
title = {Deep {Sets}},
370+
volume = {30},
371+
abstract = {We study the problem of designing models for machine learning tasks defined on sets. In contrast to the traditional approach of operating on fixed dimensional vectors, we consider objective functions defined on sets and are invariant to permutations. Such problems are widespread, ranging from the estimation of population statistics, to anomaly detection in piezometer data of embankment dams, to cosmology. Our main theorem characterizes the permutation invariant objective functions and provides a family of functions to which any permutation invariant objective function must belong. This family of functions has a special structure which enables us to design a deep network architecture that can operate on sets and which can be deployed on a variety of scenarios including both unsupervised and supervised learning tasks. We demonstrate the applicability of our method on population statistic estimation, point cloud classification, set expansion, and outlier detection.},
372+
booktitle = {Advances in {Neural} {Information} {Processing} {Systems}},
373+
author = {Zaheer, Manzil and Kottur, Satwik and Ravanbakhsh, Siamak and Poczos, Barnabas and Salakhutdinov, Russ R and Smola, Alexander J},
374+
year = {2017},
375+
file = {Full Text PDF:/Users/simonkucharsky/Zotero/storage/LPWEVIE9/Zaheer et al. - 2017 - Deep Sets.pdf:application/pdf},
376+
}

slides/deep-learning.qmd

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -756,7 +756,7 @@ $\rightarrow$ leverage properties of data to our advantage by building networks
756756

757757
![[Source: Christopher Olah's blog](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)](https://colah.github.io/posts/2015-08-Understanding-LSTMs/img/LSTM3-chain.png){fig-align="center"}
758758

759-
## Attention mechanism
759+
## Attention [@vaswani2017attention]
760760

761761
- Sequential updating is slow
762762
- Limited memory (even for LSTM)
@@ -813,7 +813,7 @@ $$
813813
- Permutation invariant
814814
- Interactions between elements
815815

816-
## Deep Set
816+
## Deep Set [@zaheer_deep_2017]
817817

818818
::::{.columns}
819819

0 commit comments

Comments
 (0)