Skip to content

Commit

Permalink
SAE post
Browse files Browse the repository at this point in the history
  • Loading branch information
tbenthompson committed Oct 15, 2024
1 parent deaea04 commit b6e6801
Show file tree
Hide file tree
Showing 2 changed files with 3,673 additions and 0 deletions.
27 changes: 27 additions & 0 deletions posts/biblio.bib
Original file line number Diff line number Diff line change
Expand Up @@ -280,4 +280,31 @@ @misc{cui2024orbenchoverrefusalbenchmarklarge
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2405.20947},
}

@misc{bloom2024gpt2residualsaes,
title = {Open Source Sparse Autoencoders for all Residual Stream Layers of GPT2 Small},
author = {Joseph Bloom},
year = {2024},
howpublished = {\url{https://www.alignmentforum.org/posts/f9EgfLSurAiqRJySD/open-source-sparse-autoencoders-for-all-residual-stream}},
}

@misc{lieberum2024gemmascopeopensparse,
title={Gemma Scope: Open Sparse Autoencoders Everywhere All At Once on Gemma 2},
author={Tom Lieberum and Senthooran Rajamanoharan and Arthur Conmy and Lewis Smith and Nicolas Sonnerat and Vikrant Varma and János Kramár and Anca Dragan and Rohin Shah and Neel Nanda},
year={2024},
eprint={2408.05147},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https://arxiv.org/abs/2408.05147},
}

@misc{thompson2024flrtfluentstudentteacherredteaming,
title={FLRT: Fluent Student-Teacher Redteaming},
author={T. Ben Thompson and Michael Sklar},
year={2024},
eprint={2407.17447},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2407.17447},
}
Loading

0 comments on commit b6e6801

Please sign in to comment.