/publications

Qurator-SPK team publications

Publications

Automatisierte semantische Anreicherung von historischen Texten
In: b.i.t.online
Vol. 27(3) 2024
📕 b.i.t.online, 27(3), pp. 232–241. 2024.
💾 github.com/qurator-spk/sbb_ner

@article{DBLP:journal/bit/LabuschN24,
author    = {Kai Labusch and Sophie Schneider and Clemens Neudecker},
title     = {Automatisierte semantische Anreicherung von historischen Texten},
journal   = {b.i.t.online},
year      = {2024},
volume    = {27},
number    = {3},
pages     = {232--241},
url       = {https://www.b-i-t-online.de/heft/2024-03-fachbeitrag-labusch.pdf}
}


Datasheets for Digital Cultural Heritage Datasets
In: Journal of Open Humanities Data
Vol. 9(1) 2023
📕 Journal of Open Humanities Data, 9(1), pp. 1–11. 2023.
💾 zenodo.org/datasheet

@article{DBLP:journal/johd/AlkemadeN24,
author    = {Henk Alkemade and Steven Claeyssens and Giovanni Colavizza and Nuno Freire and
             Jörg Lehmann and Clemens Neudecker and Giulia Osti and Daniel van Strien},
title     = {Datasheets for Digital Cultural Heritage Datasets},
journal   = {Journal of Open Humanities Data},
year      = {2023},
volume    = {9},
number    = {1},
pages     = {1--11},
url       = {https://doi.org/10.5334/johd.124}
}


Gauging the Limitations of Natural Language Supervised Text-Image Metrics Learning by Iconclass Visual Concepts
17th International Conference on Document Analysis and Recognition (ICDAR2023)
7th International Workshop on Historical Document Imaging and Processing (HIP’23)
21-26 August 2023, San José, CA, USA
📕 Proceedings of HIP'23, pp. 19–24. 2023.
💾 github.com/qurator-spk/sbb_images

@inproceedings{DBLP:conf/hip/LabuschN23,
author    = {Kai Labusch and Clemens Neudecker},
editor    = {Apostolos Antonacopoulos and Christian Clausner and Maud Ehrmann and Kai Labusch and Clemens Neudecker},
title     = {Gauging the Limitations of Natural Language Supervised Text-Image Metrics Learning by Iconclass Visual Concepts},
booktitle = {Proceedings of the 7th International Workshop on Historical Document Imaging and Processing {HIP} 2023, 
             San José;, CA, USA, August 26, 2023},
year      = {2023},
url       = {https://doi.org/10.1145/3604951.3605516}
}


Document Layout Analysis with Deep Learning and Heuristics
17th International Conference on Document Analysis and Recognition (ICDAR2023)
7th International Workshop on Historical Document Imaging and Processing (HIP’23)
21-26 August 2023, San José, CA, USA
📕 Proceedings of HIP'23, pp. 73–78. 2023.
💾 github.com/qurator-spk/eynollah

@inproceedings{DBLP:conf/hip/RezanezhadN23b,
author    = {Vahid Rezanezhad and Konstantin Baierer and Mike Gerber and Kai Labusch and Clemens Neudecker},
editor    = {Apostolos Antonacopoulos and Christian Clausner and Maud Ehrmann and Kai Labusch and Clemens Neudecker},
title     = {Document Layout Analysis with Deep Learning and Heuristics},
booktitle = {Proceedings of the 7th International Workshop on Historical Document Imaging and Processing {HIP} 2023, 
             San José, CA, USA, August 26, 2023},
year      = {2023},
url       = {https://doi.org/10.1145/3604951.3605513}
}


A hybrid CNN-Transformer Model for Historical Document Image Binarization
17th International Conference on Document Analysis and Recognition (ICDAR2023)
7th International Workshop on Historical Document Imaging and Processing (HIP’23)
21-26 August 2023, San José, CA, USA
📕 Proceedings of HIP'23, pp. 79–84. 2023.
💾 github.com/qurator-spk/sbb_binarization

@inproceedings{DBLP:conf/hip/RezanezhadN23a,
author    = {Vahid Rezanezhad and Konstantin Baierer and Clemens Neudecker},
editor    = {Apostolos Antonacopoulos and Christian Clausner and Maud Ehrmann and Kai Labusch and Clemens Neudecker},
title     = {A hybrid CNN-Transformer Model for Historical Document Image Binarization},
booktitle = {Proceedings of the 7th International Workshop on Historical Document Imaging and Processing {HIP} 2023, 
             San José, CA, USA, August 26, 2023},
year      = {2023},
url       = {https://doi.org/10.1145/3604951.3605508}
}


Cultural Heritage as Data: Digital Curation and Artificial Intelligence in Libraries
3rd Conference on Digital Curation Technologies (Qurator2022)
19-23 September 2022, Berlin, Germany
📕 CEUR-WS vol. 3234, 2022.

@inproceedings{DBLP:conf/qurator/Neudecker22,
author    = {Clemens Neudecker},
editor    = {Adrian Paschke, Georg Rehm, Clemens Neudecker, Lydia Pintscher},
title     = {Cultural Heritage as Data: Digital Curation and Artificial Intelligence in Libraries},
booktitle = {Proceedings of the 3rd Conference on Digital Curation Technologies {Qurator} 2022, 
            Berlin, Germany, September 19-23, 2022},
year      = {2022},
url       = {http://ceur-ws.org/Vol-3234/paper2.pdf}
}


Entity Linking in Multilingual Newspapers and Classical Commentaries with BERT
13th Conference and Labs of the Evaluation Forum (CLEF 2022)
Identifying Historical People, Places and other Entities (HIPE2022)
5-8 September 2022, Bolgona, Italy
📕 CEUR-WS vol. 3180, pp. 1079-1089. 2022.
💾 github.com/qurator-spk/sbb_ned

@inproceedings{DBLP:conf/clef/LabuschN22,
author    = {Kai Labusch and Clemens Neudecker},
editor    = {Guglielmo Faggioli and Nicola Ferro and Allan Hanbury and Martin Potthast},
title     = {Entity Linking in Multilingual Newspapers and Classical Commentaries with {BERT}},
booktitle = {Working Notes of {CLEF} 2022 - Conference and Labs of the Evaluation Forum, Bologna, 
             Italy, September 5-8, 2022},
year      = {2022},
url       = {http://ceur-ws.org/Vol-3180/paper-85.pdf}
}


A survey of OCR evaluation tools and metrics
16th International Conference on Document Analysis and Recognition (ICDAR2021)
6th International Workshop on Historical Document Imaging and Processing (HIP’21)
5-10 September 2021, Lausanne, Switzerland
📕 Proceedings of HIP'21, pp. 13–18. 2021.

@inproceedings{DBLP:conf/hip/Neudecker21,
author    = {Clemens Neudecker and Konstantin Baierer and Mike Gerber and Christian Clausner and Apostolos Antonacopoulos 
             and Stefan Pletschacher},
editor    = {Apostolos Antonacopoulos and Christian Clausner and Maud Ehrmann and Clemens Neudecker},
title     = {A survey of OCR evaluation tools and metrics},
booktitle = {Proceedings of the 6th International Workshop on Historical Document Imaging and Processing {HIP} 2021, 
             Lausanne, Switzerland, September 6, 2021},
year      = {2021},
url       = {https://doi.org/10.1145/3476887.3476888}
}


Named Entity Linking mit Wikidata und GND – Das Potenzial handkuratierter und strukturierter Datenquellen für die semantische Anreicherung von Volltexten
In: Qualität in der Inhaltserschließung
De Gruyter Saur 2021

📕 Qualität in der Inhaltserschließung, pp. 229–257. 2021.

@bookchapter{BIPRA-B/70/Menzel21,
author    = {Sina Menzel and Hannes Schnaitter and Josefine Zinck and Vivien Petras and Clemens Neudecker and 
             Kai Labusch and Elena Leitner and Georg Rehm},
editor    = {Michael Franke-Maier and Anna Kasprzik and Andreas Ledl and Hans Schürmann},
title     = {Named Entity Linking mit Wikidata und GND – Das Potenzial handkuratierter und strukturierter Datenquellen für 
             die semantische Anreicherung von Volltexten},
booktitle = {Qualität in der Inhaltserschließung},
year      = {2021},
url       = {https://doi.org/10.1515/9783110691597-012}
}


Methoden und Metriken zur Messung von OCR-Qualität für die Kuratierung von Daten und Metadaten
In: Qualität in der Inhaltserschließung
De Gruyter Saur 2021

📕 Qualität in der Inhaltserschließung, pp. 137–165. 2021.

@bookchapter{BIPRA-B/70/Neudecker21,
author    = {Clemens Neudecker and Karolina Zaczynska and Konstantin Baierer and Georg Rehm and Mike Gerber and 
             Julian Moreno Schneider},
editor    = {Michael Franke-Maier and Anna Kasprzik and Andreas Ledl and Hans Schürmann},
title     = {Methoden und Metriken zur Messung von OCR-Qualität für die Kuratierung von Daten und Metadaten},
booktitle = {Qualität in der Inhaltserschließung},
year      = {2021},
url       = {https://doi.org/10.1515/9783110691597-009}
}


A Two-Step Approach for Automatic OCR Post-Correction
28th International Conference on Computational Linguistics (COLING'2020)
4th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH2020)
12-13 December 2020, Barcelona, Spain
📕 Proceedings of LaTeCH2020, pp. 52–57. 2020.
💾 github.com/qurator-spk/sbb_ocr_postcorrection

@inproceedings{DBLP:conf/latech/SchaeferN20,
author    = {Robin Schaefer and Clemens Neudecker},
title     = {A Two-Step Approach for Automatic OCR Post-Correction},
booktitle = {Proceedings of the 4th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, 
             Social Sciences, Humanities and Literature {LaTeCH} 2020},
year      = {2020},
url       = {https://www.aclweb.org/anthology/2020.latechclfl-1.6.pdf}
}


Named Entity Disambiguation and Linking Historic Newspaper OCR with BERT
11th Conference and Labs of the Evaluation Forum (CLEF2020)
Identifying Historical People, Places and other Entities (HIPE2020)
22-25 September 2020, Thessaloniki, Greece
📕 CEUR-WS vol. 2696, pp. 1–14. 2020.
💾 github.com/qurator-spk/sbb_ned

@inproceedings{DBLP:conf/clef/LabuschN20,
author    = {Kai Labusch and Clemens Neudecker},
editor    = {Linda Cappellato and Carsten Eickhoff and Nicola Ferro and Aur{\'{e}}lie N{\'{e}}v{\'{e}}ol},
title     = {Named Entity Disambiguation and Linking on Historic Newspaper {OCR} with {BERT}},
booktitle = {Working Notes of {CLEF} 2020 - Conference and Labs of the Evaluation Forum, Thessaloniki, 
             Greece, September 22-25, 2020},
year      = {2020},
url       = {http://ceur-ws.org/Vol-2696/paper_163.pdf}
}


QURATOR: Innovative Technologies for Content and Data Curation
1st Conference on Digital Curation Technologies (QURATOR2020)
20-21 January 2020, Berlin, Germany
📕 CEUR-WS vol. 2535, pp. 1–15. 2020
💾 qurator.ai

@inproceedings{DBLP:conf/qurator/RehmBHKSOZBGRRR20,
author    = {Georg Rehm and Peter Bourgonje and Stefanie Hegele and Florian Kintzel and Juli{\'{a}}n Moreno 
             Schneider and Malte Ostendorff and Karolina Zaczynska and Armin Berger and Stefan Grill and 
             S{\"{o}}ren R{\"{a}}uchle and Jens Rauenbusch and Lisa Rutenburg and Andr{\'{e}} Schmidt and Mikka 
             Wild and Henry Hoffmann and Julian Fink and Sarah Schulz and Jurica Seva and Joachim Quantz and 
             Joachim B{\"{o}}ttger and Josefine Matthey and Rolf Fricke and Jan Thomsen and Adrian Paschke and 
             Jamal Al Qundus and Thomas Hoppe and Naouel Karam and Frauke Weichhardt and Christian Fillies and 
             Clemens Neudecker and Mike Gerber and Kai Labusch and Vahid Rezanezhad and Robin Schaefer and David 
             Zellh{\"{o}}fer and Daniel Siewert and Patrick Bunk and Lydia Pintscher and Elena Aleynikova and 
             Franziska Heine},
editor    = {Adrian Paschke and Clemens Neudecker and Georg Rehm and Jamal Al Qundus and Lydia Pintscher},
title     = {{QURATOR:} Innovative Technologies for Content and Data Curation},
booktitle = {Proceedings of the 1st Conference on Digital Curation Technologies, {QURATOR} 2020, Berlin, Germany, 
             January 20-21, 2020},
year      = {2020},
url       = {http://ceur-ws.org/Vol-2535/paper\_17.pdf}
}


BERT for Named Entity Recognition in Contemporary and Historic German
15th Conference on Natural Language Processing (KONVENS2019)
9-11 October 2019, Erlangen, Germany
📕 Proceedings of KONVENS2019, pp. 1–9. 2019.
💾 github.com/qurator-spk/sbb_ner

@inproceedings{DBLP:conf/konvens/LabuschNZ19,
author    = {Kai Labusch and Clemens Neudecker and David Zellh{\"{o}}fer},
title     = {{BERT} for Named Entity Recognition in Contemporary and Historic German},
booktitle = {Proceedings of the 15th Conference on Natural Language Processing, {KONVENS} 2019, Erlangen, 
             Germany, October 9-11, 2019},
year      = {2019},
url       = {https://corpora.linguistik.uni-erlangen.de/data/konvens/proceedings/papers/KONVENS2019\_paper\_4.pdf}
}


Multimodal Datasets of the Berlin State Library
2nd Conference on Language, Data and Knowledge (LDK2019)
20-23 May 2019, Leipzig, Germany
📕 CEUR-WS vol. 2402, pp. 34–39. 2019.
💾 zenodo.org/communities/stabi

@inproceedings{DBLP:conf/ldk/Zellhofer19,
author    = {David Zellh{\"{o}}fer},
editor    = {Thierry Declerck and John P. McCrae},
title     = {Multimodal Datasets of the Berlin State Library},
booktitle = {Proceedings of the Poster Session of the 2nd Conference on Language,
             Data and Knowledge, {LDK} 2019, Leipzig, Germany, May 21, 2019},
year      = {2019},
url       = {http://ceur-ws.org/Vol-2402/paper7.pdf}
}