DESCRIPTION This repository contains the datasets for automatic keyphrase extraction task. FILES * 500N-KPCrowd.zip data from Marujo:LREC2012 (News articles annotated using AMT) * 110-PT-BN-KP.zip data from Marujo:Interspeech2011 (non-English AKE corpus) * MAUI.tar.gz data from University of Waikato (KEA, MAUI systems) * Wan2008.tar.gz data from Wan:2008 * Schutz2008.tar.gz data from Schutz:2008 (only answer sets and readme are provided. the papers are available at ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/articles.tar.gz) * Nguyen2007.zip data from Nguyen:2007 * Hulth2003.tar.gz data from Hulth:2003 BIBLIOGRAPHY @InProceedings{Marujo:LREC2012, Author = {Luis Marujo and Anatole Gershman and Jaime Carbonell and Robert Frederking and Jo\~{a}o P. Neto}, Booktitle = {Proceedings of LREC 2012}, Title = {Supervised Topical Key Phrase Extraction of News Stories using Crowdsourcing, Light Filtering and Co-reference Normalization}, location = {Istanbul, Turkey}, publisher = {ELRA}, Year = {2012} } @InProceedings{Marujo:Interspeech2011, author = {Luis Marujo and M\'{a}rcio Viveiros and Jo\~{a}o P. Neto}, title = {{Keyphrase Cloud Generation of Broadcast News}}, booktitle = {Proceedings of Interspeech 2011}, publisher = {ISCA}, location = {Florence, Italy}, year = {2011}, month = {September} } @InProceedings{Wan:2008, author = {Xiaojun Wan and Jianguo Xiao}, title = {CollabRank: Towards a Collaborative Approach to Single-Document Keyphrase Extraction}, booktitle = {Proceedings of 22nd International Conference on Computational Linguistics}, year = {2008}, address = {Manchester, UK}, pages = {969--976} } @MasterThesis{Schutz:2008, author = {Alexander Thorsten Schutz}, title = {Keyphrase Extraction from Single Documents in the Open Domain Exploiting Linguistic and Statistical Methods}, booktitle = {National University of Ireland}, year = {2008} } @InProceedings{Nguyen:2007, author = {Thuy Dung Nguyen and Min-Yen Kan}, title = {Key phrase Extraction in Scientific Publications}, booktitle = {Proceeding of International Conference on Asian Digital Libraries}, year = {2007}, pages = {317--326} } @InProceedings{Medelyan:2006, author = {Olena Medelyan and Ian Witten}, title = {Thesaurus based automatic keyphrase indexing}, booktitle = {Proceedings of the 6th ACM/IEED-CS joint conference on Digital libraries}, year = {2002}, pages = {296--297} } @InProceedings{Hulth:2003, author = {Anette Hulth}, title = {Improved automatic keyword extraction given more linguistic knowledge}, booktitle = {Proceedings of the 2003 conference on Empirical methods in natural language processing}, year = {2003}, pages = {216--223} } @InProceedings{Frank:1999, author = {Eibe Frank and Gordon W. Paynter and Ian H. Witten and Carl Gutwin and Craig G. Nevill-manning}, title = {Domain Specific Keyphrase Extraction}, booktitle = {Proceedings of the 16th International Joint Conference on AI}, year = {1999}, pages = {668--673} } @InProceedings{Witten:1999, author = {Ian Witten and Gordon Paynter and Eibe Frank and Car Gutwin and Graig Nevill-Manning}, title = {KEA:Practical Automatic Key phrase Extraction}, booktitle = {Proceedings of the fourth ACM conference on Digital libraries}, year = {1999}, pages = {254--256} } CONTACT If you have a dataset for automatic keyphrase extraction task and want to share it with others, please contact me for commit rights.