/vfbr

Verfachbuchregister-App

Primary LanguagePythonMIT LicenseMIT

Verfachbuchregister-App

A djangobaseproject based web application to publish summaries of VerfachbĂĽcher from

  • Stadtgericht Bruneck
  • Oberamtsgericht Bruneck
  • Landgericht St. Michaelsburg

for the years 1750-1800 created by Michael Prokosch and Michael Span in the context of the FWF-funded project Reading in the Alps.

docker

building the image

  • docker build -t vfbr:latest .
  • docker build -t vfbr:latest --no-cache .

running the image

To run the image you should provide an .env file to pass in needed environment variables; see example below:

  • docker run -it -p 8020:8020 --env-file env.secret --name vfbr vfbr:latest

enrich workflow

train custom word vectors

python -m prodigy terms.train-vectors vfbr_vecs "http://127.0.0.1:8000/api/vfb-entry/?format=json::vollregest::10" --loader from_drf --lang de

adm_types

python -m prodigy ner.make-gold vfbr vfbr_adm_model http://127.0.0.1:8000/api/vfb-entry/?format=json::vollregest::10 --loader from_drf --label ADM-TYPE -U

vfbr_pers

python -m prodigy ner.make-gold vfbr_persons vfbr_vecs "http://127.0.0.1:8000/api/vfb-entry/?format=json::vollregest::10" --loader from_drf --label PERS -U

python -m prodigy ner.make-gold vfbr_persons vfbr_vecs "http://127.0.0.1:8000/api/vfb-entry/?format=json::vollregest::10" --loader from_drf --label PERS -U

vfbr_place

python -m prodigy ner.make-gold vfbr_places vfbr_vecs "http://127.0.0.1:8000/api/persons/?format=json::legacy_id::10" --loader from_drf --label PLACE -U

python -m prodigy ner.make-gold vfbr_places vfbr_places_model "http://127.0.0.1:8000/api/persons/?format=json::legacy_id::10" --loader from_drf --label PLACE -U

vfbr_jobs

prodigy dataset vfbr_jobs

python -m prodigy ner.make-gold vfbr_jobs vfbr_vecs "http://127.0.0.1:8000/api/persons/?format=json::legacy_id::10" --loader from_drf --label JOB -U

python -m prodigy ner.make-gold vfbr_jobs vfbr_jobs_model "http://127.0.0.1:8000/api/persons/?format=json::legacy_id::10" --loader from_drf --label JOB -U

python -m prodigy ner.batch-train vfbr_jobs vfbr_vecs --output vfbr_jobs_model -U --no-missing

teach NN / VN

prodigy ner.make-gold vfbr_nn vfbr_persons_vecs "http://127.0.0.1:8000/api/persons/?format=json::written_name::10" --loader from_drf --label NN -U

prodigy ner.make-gold vfbr_nn vfbr_persons_vecs "http://127.0.0.1:8000/api/persons/?format=json&name=Zwischenpruggerin::written_name::10" --loader from_drf --label NN -U

prodigy ner.make-gold vfbr_nn vfbr_persons_vecs "http://127.0.0.1:8000/api/persons/?format=json&ordering=name::written_name::10" --loader from_drf --label NN -U

prodigy ner.batch-train vfbr_nn vfbr_persons_vecs --output vfbr_nn_model -U --no-missing

teach VN

prodigy ner.make-gold vfbr_nn vfbr_vn_model "http://127.0.0.1:8000/api/persons/?format=json::written_name::10" --loader from_drf --label VN -U

teach terms

python -m prodigy terms.teach drf vfrb_vecs --seeds seeds.txt

useless?

train

python -m prodigy ner.batch-train vfbr_persons vfbr_vecs --output vfbr_persons_model -U --no-missing

python -m prodigy ner.batch-train vfbr_places vfbr_vecs --output vfbr_places_model -U --no-missing

teach

python -m prodigy ner.teach vfbr_quick vfbr_adm_model http://127.0.0.1:8000/api/vfb-entry/?format=json::vollregest::10 --loader from_drf -U

dedupe

pip install csvdedupe

mkdir dedupe

cd dedupe

wget -O data.csv  "http://127.0.0.1:8000/entities/person/?columns=forename&columns=name&name=&forename=&written_name=&gender=m%C3%A4nnlich&Filter=Search&_export=csv"

csvdedupe data.csv  --field_names "Umfassende Namensansetzung" Vorname Name --output_file output.csv

serialize VfbEntry to TEI

python manage.py vfbr_to_tei --settings=...