OvertureMaps/data

pyarrow.lib.ArrowInvalid: Map keys must be annotated as required.

Closed this issue · 1 comments

Getting the error
pyarrow.lib.ArrowInvalid: Map keys must be annotated as required.
when parsing "connector" and "segment" parquet files with PyArrow. The map key is indeed not flagged as required.
The relevant discussion for PyArrow is here:
apache/arrow#37389

Apologies for the (very) delayed response on this but it looks like sometime between when you asked and now that we've started correctly writing out map key as required where it is used.

❯ parq ./part-00045-79e7d2b8-990d-4f21-81d8-8fcacb35c9e8-c000.zstd.parquet --schema

 # Schema
 <pyarrow._parquet.ParquetSchema object at 0x11f089cc0>
required group field_id=-1 spark_schema {
  optional binary field_id=-1 id (String);
  optional binary field_id=-1 geometry;
  optional group field_id=-1 bbox {
    optional float field_id=-1 xmin;
    optional float field_id=-1 xmax;
    optional float field_id=-1 ymin;
    optional float field_id=-1 ymax;
  }
  optional int32 field_id=-1 version;
  optional binary field_id=-1 update_time (String);
  optional group field_id=-1 sources (List) {
    repeated group field_id=-1 list {
      optional group field_id=-1 element {
        optional binary field_id=-1 property (String);
        optional binary field_id=-1 dataset (String);
        optional binary field_id=-1 record_id (String);
        optional double field_id=-1 confidence;
      }
    }
  }
  optional binary field_id=-1 subtype (String);
  optional group field_id=-1 names {
    optional binary field_id=-1 primary (String);
    optional group field_id=-1 common (Map) {
      repeated group field_id=-1 key_value {
        required binary field_id=-1 key (String);
        optional binary field_id=-1 value (String);
      }
    }
    optional group field_id=-1 rules (List) {
      repeated group field_id=-1 list {
        optional group field_id=-1 element {
          optional binary field_id=-1 variant (String);
          optional binary field_id=-1 language (String);
          optional binary field_id=-1 value (String);
          optional group field_id=-1 between (List) {
            repeated group field_id=-1 list {
              optional double field_id=-1 element;
            }
          }
          optional binary field_id=-1 side (String);
        }
      }
    }
  }
  optional binary field_id=-1 class (String);
  optional group field_id=-1 connector_ids (List) {
    repeated group field_id=-1 list {
      optional binary field_id=-1 element (String);
    }
  }
 ...
}