OCR-D/ocrd_all

no word coordinates?

Closed this issue · 6 comments

with this workflow, I don't get word coordinates in XML:

singocrd ocrd workspace init
singocrd ocrd workspace add -g P_00001 -G OCR-D-IMG -i OCR-D-IMG_00001 -m image/
►tiff OCR-D-IMG/00001.tif
singocrd ocrd-sbb-binarize -P model default-2021-03-09 -I OCR-D-IMG -O OCR-D-001
singocrd ocrd-anybaseocr-crop -I OCR-D-001 -O OCR-D-002
singocrd ocrd-olena-binarize -P impl wolf -P k 0.10 -I OCR-D-002 -O OCR-D-003
singocrd ocrd-cis-ocropy-deskew -P level-of-operation page -I OCR-D-003 -O OCR-D
►-004
singocrd ocrd-tesserocr-segment -P find_tables true -P shrink_polygons true -I
► OCR-D-004 -O OCR-D-005
singocrd ocrd-calamari-recognize -P checkpoint_dir $HOME/ocrd_models/ocrd-
►calamari-recognize/qurator-gt4histocr-1.0 -I OCR-D-005 -O OCR-D-OCR

XML output (somewhat stripped and unicode characters quoted):

<?xml version="1.0" encoding="UTF-8"?>
<PcGts xmlns:pc="http://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15
►" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http
►://schema.primaresearch.org/PAGE/gts/pagecontent/2019-07-15 http://schema.
►primaresearch.org/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd" pcGtsId="OCR-
►D-OCR_00001">
  <Metadata>
    <Creator>OCR-D/core 2.63.0</Creator>
    <Created>2024-03-01T16:02:25.679686</Created>
    <LastChange>2024-03-01T16:03:37</LastChange>
    <MetadataItem type="processingStep" name="preprocessing/optimization/
►binarization" value="ocrd-sbb-binarize">
      <Labels externalModel="ocrd-tool" externalId="parameters">
        <Label value="default-2021-03-09" type="model"/>
        <Label value="page" type="operation_level"/>
      </Labels>
      <Labels externalModel="ocrd-tool" externalId="version">
        <Label value="0.1.0" type="ocrd-sbb-binarize"/>
        <Label value="2.63.0" type="ocrd/core"/>
      </Labels>
    </MetadataItem>
    <MetadataItem type="processingStep" name="preprocessing/optimization/
►cropping" value="ocrd-anybaseocr-crop">
      <Labels externalModel="ocrd-tool" externalId="parameters">
        <Label value="0" type="dpi"/>
        <Label value="50.0" type="rulerRatioMax"/>
        <Label value="3.0" type="rulerRatioMin"/>
        <Label value="0.3" type="rulerAreaMax"/>
        <Label value="0.01" type="rulerAreaMin"/>
        <Label value="0.95" type="rulerWidthMax"/>
        <Label value="0.05" type="columnAreaMin"/>
        <Label value="0.04" type="columnSepWidthMax"/>
        <Label value="0.25" type="marginTop"/>
        <Label value="0.75" type="marginBottom"/>
        <Label value="0.3" type="marginLeft"/>
        <Label value="0.7" type="marginRight"/>
        <Label value="10" type="padding"/>
      </Labels>
      <Labels externalModel="ocrd-tool" externalId="version">
        <Label value="1.10.0" type="ocrd-anybaseocr-crop"/>
        <Label value="2.63.0" type="ocrd/core"/>
      </Labels>
    </MetadataItem>
    <MetadataItem type="processingStep" name="preprocessing/optimization/
►binarization" value="ocrd-olena-binarize">
      <Labels>
        <Label value="0" type="win-size"/>
        <Label value="wolf" type="impl"/>
        <Label value="0" type="dpi"/>
        <Label value="0.1" type="k"/>
      </Labels>
    </MetadataItem>
    <MetadataItem type="processingStep" name="preprocessing/optimization/
►deskewing" value="ocrd-cis-ocropy-deskew">
      <Labels externalModel="ocrd-tool" externalId="parameters">
        <Label value="page" type="level-of-operation"/>
        <Label value="5.0" type="maxskew"/>
      </Labels>
      <Labels externalModel="ocrd-tool" externalId="version">
        <Label value="0.1.5" type="ocrd-cis-ocropy-deskew"/>
        <Label value="2.63.0" type="ocrd/core"/>
      </Labels>
    </MetadataItem>
    <MetadataItem type="processingStep" name="layout/segmentation/region" value
►="ocrd-tesserocr-segment">
      <Labels externalModel="ocrd-tool" externalId="parameters">
        <Label value="True" type="find_tables"/>
        <Label value="True" type="shrink_polygons"/>
        <Label value="0" type="dpi"/>
        <Label value="4" type="padding"/>
        <Label value="False" type="block_polygons"/>
        <Label value="False" type="find_staves"/>
        <Label value="False" type="sparse_text"/>
        <Label value="True" type="overwrite_segments"/>
        <Label value="region" type="segmentation_level"/>
        <Label value="none" type="textequiv_level"/>
        <Label value="True" type="overwrite_text"/>
        <Label value="False" type="raw_lines"/>
        <Label value="" type="char_whitelist"/>
        <Label value="" type="char_blacklist"/>
        <Label value="" type="char_unblacklist"/>
        <Label value="{}" type="tesseract_parameters"/>
        <Label value="{}" type="xpath_parameters"/>
        <Label value="{}" type="xpath_model"/>
        <Label value="False" type="auto_model"/>
        <Label value="DEFAULT" type="oem"/>
      </Labels>
      <Labels externalModel="ocrd-tool" externalId="version">
        <Label value="0.18.0 (tesseract 5.3.4)" type="ocrd-tesserocr-segment"/>
        <Label value="2.63.0" type="ocrd/core"/>
      </Labels>
    </MetadataItem>
    <MetadataItem type="processingStep" name="recognition/text-recognition"
► value="ocrd-calamari-recognize">
      <Labels externalModel="ocrd-tool" externalId="parameters">
        <Label value="/home/hd/hd_hd/hd_wu120/ocrd_models/ocrd-calamari-
►recognize/qurator-gt4histocr-1.0" type="checkpoint_dir"/>
        <Label value="confidence_voter_default_ctc" type="voter"/>
        <Label value="line" type="textequiv_level"/>
        <Label value="0.001" type="glyph_conf_cutoff"/>
      </Labels>
      <Labels externalModel="ocrd-tool" externalId="version">
        <Label value="1.0.6 (calamari 1.0.6, tensorflow 2.13.1)" type="ocrd-
►calamari-recognize"/>
        <Label value="2.63.0" type="ocrd/core"/>
      </Labels>
    </MetadataItem>
  </Metadata>
  <Page imageFilename="OCR-D-IMG/00001.tif" imageWidth="2523" imageHeight="3173
►" orientation="0.">
    <AlternativeImage filename="OCR-D-001/OCR-D-001_00001.IMG-BIN.png" comments
►=",binarized"/>
    <AlternativeImage filename="OCR-D-002/OCR-D-002_00001.IMG-CROP.png" comments
►=",binarized,cropped"/>
    <AlternativeImage filename="OCR-D-003/OCR-D-IMG_00001-BIN_wolf.png" comments
►="cropped,binarized"/>
    <AlternativeImage filename="OCR-D-004/OCR-D-004_00001.IMG-DESKEW.png"
► comments=",binarized,cropped,deskewed"/>
    <AlternativeImage filename="OCR-D-005/OCR-D-005_00001.IMG-BIN.png" comments
►=",binarized,cropped,deskewed,binarized,clipped"/>
    <Border>
      <Coords />
    </Border>
    <ReadingOrder>
      <OrderedGroup id="reading-order">
        <RegionRefIndexed index="0" regionRef="region0000"/>
        <RegionRefIndexed index="1" regionRef="region0001"/>
        <RegionRefIndexed index="2" regionRef="region0002"/>
        <RegionRefIndexed index="3" regionRef="region0003"/>
        <RegionRefIndexed index="5" regionRef="region0005"/>
        <RegionRefIndexed index="6" regionRef="region0006"/>
        <RegionRefIndexed index="7" regionRef="region0007"/>
        <RegionRefIndexed index="8" regionRef="region0008"/>
        <RegionRefIndexed index="9" regionRef="region0009"/>
        <RegionRefIndexed index="10" regionRef="region0010"/>
      </OrderedGroup>
    </ReadingOrder>
    <TextRegion id="region0000" orientation="0.245354055365198" type="caption"
► readingDirection="left-to-right" textLineOrder="top-to-bottom">
      <Coords />
      <TextLine id="region0000_line0000">
        <Coords />
        <TextEquiv conf="0.999661564826965">
          <Unicode>den ſich unbrauchbar gewordene Geräthe genug auffinden, um</
►Unicode>
        </TextEquiv>
      </TextLine>
      <TextLine id="region0000_line0001">
        <Coords />
        <TextEquiv conf="0.999739110469818">
          <Unicode>für das archäologiſche Studium die nöthigen Anhaltspunkte zu
►</Unicode>
        </TextEquiv>
      </TextLine>
      <TextLine id="region0000_line0002">
        <Coords />
        <TextEquiv conf="0.999832928180695">
          <Unicode>geben. Was noch fehlt, läßt ſich durch Abgüſſe erſetzen. Was
►</Unicode>
        </TextEquiv>
      </TextLine>
      <TextLine id="region0000_line0003">
        <Coords />
        <TextEquiv conf="0.999873757362366">
          <Unicode>aber die noch brauchbaren Gefäße und Gewänder betrifft, ſo</
►Unicode>
        </TextEquiv>
      </TextLine>
      <TextLine id="region0000_line0004">
        <Coords />
        <TextEquiv conf="0.995914876461029">
          <Unicode>werden dieſe unter dem Einfluß der Vereine zweckmäßig re†2014
►‡</Unicode>
        </TextEquiv>
      </TextLine>
      <TextLine id="region0000_line0005">
        <Coords />
        <TextEquiv conf="0.999891519546509">
          <Unicode>ſtaurirt, und ihrer heiligen Beſtimmung erhalten bleiben,
► ohne</Unicode>
        </TextEquiv>
      </TextLine>
      <TextEquiv>
        <Unicode>den ſich unbrauchbar gewordene Geräthe genug auffinden, um
für das archäologiſche Studium die nöthigen Anhaltspunkte zu
geben. Was noch fehlt, läßt ſich durch Abgüſſe erſetzen. Was
aber die noch brauchbaren Gefäße und Gewänder betrifft, ſo
werden dieſe unter dem Einfluß der Vereine zweckmäßig re†2014‡
ſtaurirt, und ihrer heiligen Beſtimmung erhalten bleiben, ohne</Unicode>
      </TextEquiv>
    </TextRegion>

...

I'm trying again with -P textequiv_level word

By the self-documentation of ocrd-calamari-recognize's parameters, textequiv_level=line is the default, so yes, no words are to be expected (because any line-based recognizer will invalidate previous word/glyph segmentation).

Pass -P textequiv_level word (or glyph) if you want more.

I'm trying again with -P textequiv_level word

Now you were faster :)

Hmmm

+ /home/hd/hd_hd/xxxxx/local/bin/time singularity exec --bind /scratch/xxxxx/job
►_2566614_o01n11:/tmp --bind .:/data --bind /home/hd/hd_hd/xxxxx/ocrd_models:/usr
►/local/share -e --env-file /home/hd/hd_hd/xxxxx/ocrd.env /home/hd/hd_hd/xxxxx/
►ocrd.sif ocrd-tesserocr-segment -P find_tables true -P shrink_polygons true -P
► textequiv_level word -I OCR-D-004 -O OCR-D-005
Traceback (most recent call last):
  File "/usr/local/bin/ocrd-tesserocr-segment", line 33, in <module>
    sys.exit(load_entry_point('ocrd-tesserocr', 'console_scripts', 'ocrd-
►tesserocr-segment')())
  File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1157, in __
►call__
    return self.main(*args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1078, in
► main
    rv = self.invoke(ctx)
  File "/usr/local/lib/python3.8/site-packages/click/core.py", line 1434, in
► invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/usr/local/lib/python3.8/site-packages/click/core.py", line 783, in
► invoke
    return __callback(*args, **kwargs)
  File "/build/ocrd_tesserocr/ocrd_tesserocr/cli.py", line 18, in ocrd_tesserocr
►_segment
    return ocrd_cli_wrap_processor(TesserocrSegment, *args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/ocrd/decorators/__init__.py",
► line 133, in ocrd_cli_wrap_processor
    run_processor(processorClass, mets_url=mets, workspace=workspace, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/ocrd/processor/helpers.py", line
► 87, in run_processor
    processor = get_processor(
  File "/usr/local/lib/python3.8/site-packages/ocrd/processor/helpers.py", line
► 401, in get_processor
    return processor_class(
  File "/build/ocrd_tesserocr/ocrd_tesserocr/segment.py", line 15, in __init__
    super().__init__(*args, **kwargs)
  File "/build/ocrd_tesserocr/ocrd_tesserocr/recognize.py", line 130, in __init_
►_
    super().__init__(*args, **kwargs)
  File "/usr/local/lib/python3.8/site-packages/ocrd/processor/base.py", line 152
►, in __init__
    raise Exception("Invalid parameters %s" % report.errors)
Exception: Invalid parameters ["[] Additional properties are not allowed ('
►textequiv_level' was unexpected)"]

No, not ocrd-tesserocr-segment, ocrd-calamari-recognize!