- file:./tex/projectproposal.tex
- file:./tex/projectbibliography.bib
- arxiv Neural Machine Translation by Jointly Learning to Align and Translate
- file:./papers/jointlyLearningToAlignAndTranslate.pdf
@misc{bahdanau2016neural,
title={Neural Machine Translation by Jointly Learning to Align and Translate},
author={Dzmitry Bahdanau and Kyunghyun Cho and Yoshua Bengio},
year={2016},
eprint={1409.0473},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
The Illustrated Transformer – Jay Alammar – Visualizing machine learning one …
- arxiv attention is all you need
- file:./papers/attentionIsAllYouNeed.pdf
@article{allyouneed,
author = {Ashish Vaswani and
Noam Shazeer and
Niki Parmar and
Jakob Uszkoreit and
Llion Jones and
Aidan N. Gomez and
Lukasz Kaiser and
Illia Polosukhin},
title = {Attention Is All You Need},
journal = {CoRR},
volume = {abs/1706.03762},
year = {2017},
url = {http://arxiv.org/abs/1706.03762},
archivePrefix = {arXiv},
eprint = {1706.03762},
timestamp = {Sat, 23 Jan 2021 01:20:40 +0100},
biburl = {https://dblp.org/rec/journals/corr/VaswaniSPUJGKP17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
- {1810.04805} BERT: Pre-training of Deep Bidirectional Transformers for Langua…
- file:./papers/1810.04805.pdf
@article{bert,
author = {Jacob Devlin and
Ming{-}Wei Chang and
Kenton Lee and
Kristina Toutanova},
title = {{BERT:} Pre-training of Deep Bidirectional Transformers for Language
Understanding},
journal = {CoRR},
volume = {abs/1810.04805},
year = {2018},
url = {http://arxiv.org/abs/1810.04805},
archivePrefix = {arXiv},
eprint = {1810.04805},
timestamp = {Tue, 30 Oct 2018 20:39:56 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1810-04805.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
{2003.08271} Pre-trained Models for Natural Language Processing: A Survey file:./papers/2003.08271.pdf
@misc{qiu2020pretrained,
title={Pre-trained Models for Natural Language Processing: A Survey},
author={Xipeng Qiu and Tianxiang Sun and Yige Xu and Yunfan Shao and Ning Dai and Xuanjing Huang},
year={2020},
eprint={2003.08271},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
file:./papers/1503.02531.pdf {1503.02531} Distilling the Knowledge in a Neural Network
@misc{hinton2015distilling,
title={Distilling the Knowledge in a Neural Network},
author={Geoffrey Hinton and Oriol Vinyals and Jeff Dean},
year={2015},
eprint={1503.02531},
archivePrefix={arXiv},
primaryClass={stat.ML}
}
{1909.10351} TinyBERT: Distilling BERT for Natural Language Understanding file:./papers/1909.10351.pdf
@article{tinybert,
author = {Xiaoqi Jiao and
Yichun Yin and
Lifeng Shang and
Xin Jiang and
Xiao Chen and
Linlin Li and
Fang Wang and
Qun Liu},
title = {TinyBERT: Distilling {BERT} for Natural Language Understanding},
journal = {CoRR},
volume = {abs/1909.10351},
year = {2019},
url = {http://arxiv.org/abs/1909.10351},
archivePrefix = {arXiv},
eprint = {1909.10351},
timestamp = {Fri, 27 Sep 2019 13:04:21 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1909-10351.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
file:./papers/2008.05221.pdf {2008.05221} Compression of Deep Learning Models for Text: A Survey
@misc{gupta2020compression,
title={Compression of Deep Learning Models for Text: A Survey},
author={Manish Gupta and Puneet Agrawal},
year={2020},
eprint={2008.05221},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
{1711.02782} Block-Sparse Recurrent Neural Networks
@article{blocksparse
author = {Sharan Narang and
Eric Undersander and
Gregory F. Diamos},
title = {Block-Sparse Recurrent Neural Networks},
journal = {CoRR},
volume = {abs/1711.02782},
year = {2017},
url = {http://arxiv.org/abs/1711.02782},
archivePrefix = {arXiv},
eprint = {1711.02782},
timestamp = {Mon, 13 Aug 2018 16:48:36 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1711-02782.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
{1906.04341} What Does BERT Look At? An Analysis of BERT’s Attention
@article{whatdoesbertlookat,
author = {Kevin Clark and
Urvashi Khandelwal and
Omer Levy and
Christopher D. Manning},
title = {What Does {BERT} Look At? An Analysis of BERT's Attention},
journal = {CoRR},
volume = {abs/1906.04341},
year = {2019},
url = {http://arxiv.org/abs/1906.04341},
archivePrefix = {arXiv},
eprint = {1906.04341},
timestamp = {Fri, 14 Jun 2019 09:38:24 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1906-04341.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
GLUE Benchmark file:./papers/glue.pdf
@article{glue,
author = {Rowan Zellers and
Yonatan Bisk and
Roy Schwartz and
Yejin Choi},
title = {{SWAG:} {A} Large-Scale Adversarial Dataset for Grounded Commonsense
Inference},
journal = {CoRR},
volume = {abs/1808.05326},
year = {2018},
url = {http://arxiv.org/abs/1808.05326},
archivePrefix = {arXiv},
eprint = {1808.05326},
timestamp = {Wed, 23 Dec 2020 10:37:10 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1808-05326.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
The Stanford Question Answering Dataset {1806.03822} Know What You Don’t Know: Unanswerable Questions for SQuAD file:./papers/1806.03822.pdf
@article{squad,
author = {Pranav Rajpurkar and
Robin Jia and
Percy Liang},
title = {Know What You Don't Know: Unanswerable Questions for SQuAD},
journal = {CoRR},
volume = {abs/1806.03822},
year = {2018},
url = {http://arxiv.org/abs/1806.03822},
archivePrefix = {arXiv},
eprint = {1806.03822},
timestamp = {Mon, 13 Aug 2018 16:48:21 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1806-03822.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
{1808.05326} SWAG: A Large-Scale Adversarial Dataset for Grounded Commonsense… file:./papers/1808.05326.pdf
@article{swag,
author = {Rowan Zellers and
Yonatan Bisk and
Roy Schwartz and
Yejin Choi},
title = {{SWAG:} {A} Large-Scale Adversarial Dataset for Grounded Commonsense
Inference},
journal = {CoRR},
volume = {abs/1808.05326},
year = {2018},
url = {http://arxiv.org/abs/1808.05326},
archivePrefix = {arXiv},
eprint = {1808.05326},
timestamp = {Wed, 23 Dec 2020 10:37:10 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1808-05326.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
{1905.05460} Cognitive Graph for Multi-Hop Reading Comprehension at Scale file:./papers/1905.05460.pdf
@article{multihop,
author = {Ming Ding and
Chang Zhou and
Qibin Chen and
Hongxia Yang and
Jie Tang},
title = {Cognitive Graph for Multi-Hop Reading Comprehension at Scale},
journal = {CoRR},
volume = {abs/1905.05460},
year = {2019},
url = {http://arxiv.org/abs/1905.05460},
archivePrefix = {arXiv},
eprint = {1905.05460},
timestamp = {Tue, 28 May 2019 12:48:08 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1905-05460.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}