HDFGroup/hdf5-json

Investigate the source of roundtrip JSON -> HDF5 -> JSON difference

Opened this issue · 13 comments

This is the input HDF5/JSON:

{
    "apiVersion": "1.0.0",
    "datasets": {
        "6497e74e-6e8e-4290-bee9-535f5a66f665": {
            "alias": [
                "/a"
            ],
            "attributes": [
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "d2922e88-a7c1-4013-bce5-b6f2f78baa36",
                    "name": "DIMENSION_LIST",
                    "shape": {
                        "class": "H5S_SIMPLE",
                        "dims": [
                            1
                        ],
                        "maxdims": [
                            1
                        ]
                    },
                    "type": {
                        "base": {
                            "base": "H5T_STD_REF_OBJ",
                            "class": "H5T_REFERENCE"
                        },
                        "class": "H5T_VLEN"
                    },
                    "value": [
                        [
                            "datasets/edd5c2fe-db6d-4916-9e68-069c4df80005"
                        ]
                    ]
                }
            ],
            "creationProperties": {
                "layout": {
                    "class": "H5D_CONTIGUOUS"
                }
            },
            "description": "",
            "shape": {
                "class": "H5S_SIMPLE",
                "dims": [
                    0,
                    3
                ],
                "maxdims": [
                    "H5S_UNLIMITED",
                    3
                ]
            },
            "type": {
                "base": "H5T_IEEE_F32LE",
                "class": "H5T_FLOAT"
            }
        },
        "edd5c2fe-db6d-4916-9e68-069c4df80005": {
            "alias": [
                "/m"
            ],
            "attributes": [
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "3d1ad24b-514c-41d8-b019-fe89933c7505",
                    "name": "CLASS",
                    "shape": {
                        "class": "H5S_SCALAR"
                    },
                    "type": {
                        "charSet": "H5T_CSET_ASCII",
                        "class": "H5T_STRING",
                        "length": 16,
                        "strPad": "H5T_STR_NULLTERM"
                    },
                    "value": "DIMENSION_SCALE"
                },
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "dee3c381-0f35-4d84-8b34-71aa8c8deb21",
                    "name": "REFERENCE_LIST",
                    "shape": {
                        "class": "H5S_SIMPLE",
                        "dims": [
                            1
                        ],
                        "maxdims": [
                            1
                        ]
                    },
                    "type": {
                        "class": "H5T_COMPOUND",
                        "fields": [
                            {
                                "name": "dataset",
                                "type": {
                                    "base": "H5T_STD_REF_OBJ",
                                    "class": "H5T_REFERENCE"
                                }
                            },
                            {
                                "name": "index",
                                "type": {
                                    "base": "H5T_STD_I32LE",
                                    "class": "H5T_INTEGER"
                                }
                            }
                        ]
                    },
                    "value": [
                        [
                            "datasets/6497e74e-6e8e-4290-bee9-535f5a66f665",
                            0
                        ]
                    ]
                }
            ],
            "creationProperties": {
                "layout": {
                    "class": "H5D_CONTIGUOUS"
                }
            },
            "description": "",
            "shape": {
                "class": "H5S_SIMPLE",
                "dims": [
                    0
                ],
                "maxdims": [
                    "H5S_UNLIMITED"
                ]
            },
            "type": {
                "base": "H5T_IEEE_F32LE",
                "class": "H5T_FLOAT"
            }
        }
    },
    "groups": {
        "31c7d987-47ce-4a03-92f7-2d4b3f0e5fb5": {
            "alias": [
                "/"
            ],
            "attributes": [],
            "description": "Group: /",
            "links": [
                {
                    "class": "H5L_TYPE_HARD",
                    "collection": "datasets",
                    "id": "6497e74e-6e8e-4290-bee9-535f5a66f665",
                    "title": "a"
                },
                {
                    "class": "H5L_TYPE_HARD",
                    "collection": "datasets",
                    "id": "edd5c2fe-db6d-4916-9e68-069c4df80005",
                    "title": "m"
                }
            ]
        }
    },
    "id": "31c7d987-47ce-4a03-92f7-2d4b3f0e5fb5",
    "root": "31c7d987-47ce-4a03-92f7-2d4b3f0e5fb5"
}

The "DIMENSION_LIST" attribute of dataset 'a/' looks like the following in the input file above:

{
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "d2922e88-a7c1-4013-bce5-b6f2f78baa36",
                    "name": "DIMENSION_LIST",
                    "shape": {
                        "class": "H5S_SIMPLE",
                        "dims": [
                            1
                        ],
                        "maxdims": [
                            1
                        ]
                    },
                    "type": {
                        "base": {
                            "base": "H5T_STD_REF_OBJ",
                            "class": "H5T_REFERENCE"
                        },
                        "class": "H5T_VLEN"
                    },
                    "value": [
                        [
                            "datasets/edd5c2fe-db6d-4916-9e68-069c4df80005"
                        ]
                    ]
                }

The same attribute after converting to HDF5 and back to JSON:

{
                    "name": "DIMENSION_LIST", 
                    "shape": {
                        "class": "H5S_SIMPLE", 
                        "dims": [
                            2
                        ]
                    }, 
                    "type": {
                        "base": {
                            "base": "H5T_STD_REF_OBJ", 
                            "class": "H5T_REFERENCE"
                        }, 
                        "class": "H5T_VLEN"
                    }, 
                    "value": [
                        [
                            "datasets/bb44ec80-5fbd-11e5-a1db-3c15c2da029e"
                        ], 
                        []
                    ]
                }

It looks like the dataspace got extended by 1 and a null value added to the end.
This is happening on conversion to hdf5. From output of h5dump:

ATTRIBUTE "DIMENSION_LIST" {
         DATATYPE  H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
         DATASPACE  SIMPLE { ( 2 ) / ( 2 ) }
         DATA {
         (0): (DATASET 2438 /m ), ()
         }
      }

I'll look into this.

What's up with the 'description' key? That is not in the JSON spec.

Also, I have a question about creationProperties. Attribute creationProperties are not supported currently in jsontoh5.py. I'll open a separate issue about this.

What's up with the 'description' key? That is not in the JSON spec.

It is an extra key added for Product Designer. A user can add textual description to any element.

I think this is working as designed... Since the dataset the dimension scale is attaching to is 2d, each element of the dimension list has two place holders. If only one scale is attached, the other spot remains empty.

If you run the following Python sample:

import h5py
import numpy as np

f = h5py.File("mydimscale2d.h5", "w")
dset = f.create_dataset('temperatures', (10,10), dtype='f')
f.create_dataset('scale_x', data=np.arange(10)*10e3)
dset.dims.create_scale(f['scale_x'], "x axis")
dset.dims[0].attach_scale(f['scale_x'])
f.close()

And look at the h5dump output you'll see there's an empty element in the dimension_list.

@ajelenak-thg - what do you think?

I agree. It makes sense to have a dimension list placeholder for each dataset dimension even when it is not used.

Where there any other roundtrip differences in the JSON above that should be investigated?

Not sure, perhaps Joe Lee can chime in. (Cannot @mention him in this repo?)

I don't see any other difference. Can you fix jsontoh5 to allow extra [] in DIMENSION_LIST value so that HPD server doesn't throw the following error message?

hpdws.publish():500
{"link": [{"href": "https://hpd-ws.herokuapp.com/template/ec06b81c-4538-48ad-b152-e905adc7f633/publish", "rel": "retry", "title": "Template file"}, {"href": "https://hpd-ws.herokuapp.com/entity/ec06b81c-4538-48ad-b152-e905adc7f633", "rel": "version", "title": "test_unlmited3 entity version HEAD"}, {"href": "https://hpd-ws.herokuapp.com/project/Joe", "rel": "project", "title": "Project Joe"}], "message": "[Errno 22] Invalid dimension list value"}

Also, should HPD server throw an error if DIMENSION_LIST size doesn't match the rank of dataset?

@hyoklee - I'm not sure what change you are asking for in h5tojson... currently the h5 output of DIMENSION_LIST will have n elements (where n is the rank of the parent dataset), regardless of the number of values in the dimension list.

For the record, this is the HDF5/JSON that causes the error reported in Joe's comment.

{
    "apiVersion": "1.0.0",
    "datasets": {
        "fd252e22-6231-11e5-b806-ec814d819d13": {
            "alias": [
                "/a"
            ],
            "attributes": [
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "410885b7-7bef-47a3-9c32-8bfcb1aa7340",
                    "name": "DIMENSION_LIST",
                    "shape": {
                        "class": "H5S_SIMPLE",
                        "dims": [
                            2
                        ],
                        "maxdims": [
                            2
                        ]
                    },
                    "type": {
                        "base": {
                            "base": "H5T_STD_REF_OBJ",
                            "class": "H5T_REFERENCE"
                        },
                        "class": "H5T_VLEN"
                    },
                    "value": [
                        [
                            "datasets/fd252e24-6231-11e5-8986-ec814d819d13"
                        ],
                        []
                    ]
                }
            ],
            "creationProperties": {
                "fillValue": 0.0,
                "layout": {
                    "class": "H5D_CHUNKED",
                    "dims": [
                        512,
                        3
                    ]
                }
            },
            "description": "",
            "shape": {
                "class": "H5S_SIMPLE",
                "dims": [
                    0,
                    3
                ],
                "maxdims": [
                    "H5S_UNLIMITED",
                    3
                ]
            },
            "type": {
                "base": "H5T_IEEE_F32LE",
                "class": "H5T_FLOAT"
            }
        },
        "fd252e23-6231-11e5-8cdb-ec814d819d13": {
            "alias": [
                "/b"
            ],
            "attributes": [
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "f7e49a91-5480-4849-ac46-c67a342aaec7",
                    "name": "DIMENSION_LIST",
                    "shape": {
                        "class": "H5S_SIMPLE",
                        "dims": [
                            2
                        ],
                        "maxdims": [
                            2
                        ]
                    },
                    "type": {
                        "base": {
                            "base": "H5T_STD_REF_OBJ",
                            "class": "H5T_REFERENCE"
                        },
                        "class": "H5T_VLEN"
                    },
                    "value": [
                        [
                            "datasets/fd252e25-6231-11e5-8996-ec814d819d13"
                        ],
                        []
                    ]
                }
            ],
            "creationProperties": {
                "fillValue": 0.0,
                "layout": {
                    "class": "H5D_CHUNKED",
                    "dims": [
                        128,
                        32
                    ]
                }
            },
            "description": "",
            "shape": {
                "class": "H5S_SIMPLE",
                "dims": [
                    0,
                    128
                ],
                "maxdims": [
                    "H5S_UNLIMITED",
                    128
                ]
            },
            "type": {
                "base": "H5T_IEEE_F32LE",
                "class": "H5T_FLOAT"
            }
        },
        "fd252e24-6231-11e5-8986-ec814d819d13": {
            "alias": [
                "/m"
            ],
            "attributes": [
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "c3b3d62d-432b-47de-862b-70b68e5499ea",
                    "name": "CLASS",
                    "shape": {
                        "class": "H5S_SCALAR"
                    },
                    "type": {
                        "charSet": "H5T_CSET_ASCII",
                        "class": "H5T_STRING",
                        "length": 16,
                        "strPad": "H5T_STR_NULLTERM"
                    },
                    "value": "DIMENSION_SCALE"
                },
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "e5c31c94-d1b4-440f-ae8d-e6de15d80dd8",
                    "name": "REFERENCE_LIST",
                    "shape": {
                        "class": "H5S_SIMPLE",
                        "dims": [
                            1
                        ],
                        "maxdims": [
                            1
                        ]
                    },
                    "type": {
                        "class": "H5T_COMPOUND",
                        "fields": [
                            {
                                "name": "dataset",
                                "type": {
                                    "base": "H5T_STD_REF_OBJ",
                                    "class": "H5T_REFERENCE"
                                }
                            },
                            {
                                "name": "index",
                                "type": {
                                    "base": "H5T_STD_I32LE",
                                    "class": "H5T_INTEGER"
                                }
                            }
                        ]
                    },
                    "value": [
                        [
                            "datasets/fd252e22-6231-11e5-b806-ec814d819d13",
                            0
                        ]
                    ]
                }
            ],
            "creationProperties": {
                "fillValue": 0.0,
                "layout": {
                    "class": "H5D_CHUNKED",
                    "dims": [
                        1024
                    ]
                }
            },
            "description": "",
            "shape": {
                "class": "H5S_SIMPLE",
                "dims": [
                    0
                ],
                "maxdims": [
                    "H5S_UNLIMITED"
                ]
            },
            "type": {
                "base": "H5T_IEEE_F32LE",
                "class": "H5T_FLOAT"
            }
        },
        "fd252e25-6231-11e5-8996-ec814d819d13": {
            "alias": [
                "/n"
            ],
            "attributes": [
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "0589b03a-9d61-45e5-bc36-4e8a45337e0f",
                    "name": "CLASS",
                    "shape": {
                        "class": "H5S_SCALAR"
                    },
                    "type": {
                        "charSet": "H5T_CSET_ASCII",
                        "class": "H5T_STRING",
                        "length": 16,
                        "strPad": "H5T_STR_NULLTERM"
                    },
                    "value": "DIMENSION_SCALE"
                },
                {
                    "creationProperties": {
                        "nameCharEncoding": "H5T_CSET_UTF8"
                    },
                    "description": "",
                    "id": "41866844-f20c-4131-846b-aeb4bc6e5846",
                    "name": "REFERENCE_LIST",
                    "shape": {
                        "class": "H5S_SIMPLE",
                        "dims": [
                            1
                        ],
                        "maxdims": [
                            1
                        ]
                    },
                    "type": {
                        "class": "H5T_COMPOUND",
                        "fields": [
                            {
                                "name": "dataset",
                                "type": {
                                    "base": "H5T_STD_REF_OBJ",
                                    "class": "H5T_REFERENCE"
                                }
                            },
                            {
                                "name": "index",
                                "type": {
                                    "base": "H5T_STD_I32LE",
                                    "class": "H5T_INTEGER"
                                }
                            }
                        ]
                    },
                    "value": [
                        [
                            "datasets/fd252e23-6231-11e5-8cdb-ec814d819d13",
                            0
                        ]
                    ]
                }
            ],
            "creationProperties": {
                "fillValue": 0.0,
                "layout": {
                    "class": "H5D_CHUNKED",
                    "dims": [
                        1024
                    ]
                }
            },
            "description": "",
            "shape": {
                "class": "H5S_SIMPLE",
                "dims": [
                    0
                ],
                "maxdims": [
                    "H5S_UNLIMITED"
                ]
            },
            "type": {
                "base": "H5T_IEEE_F32LE",
                "class": "H5T_FLOAT"
            }
        }
    },
    "groups": {
        "ec06b81c-4538-48ad-b152-e905adc7f633": {
            "alias": [
                "/"
            ],
            "attributes": [],
            "description": "Group: /",
            "links": [
                {
                    "class": "H5L_TYPE_HARD",
                    "collection": "datasets",
                    "id": "fd252e22-6231-11e5-b806-ec814d819d13",
                    "title": "a"
                },
                {
                    "class": "H5L_TYPE_HARD",
                    "collection": "datasets",
                    "id": "fd252e23-6231-11e5-8cdb-ec814d819d13",
                    "title": "b"
                },
                {
                    "class": "H5L_TYPE_HARD",
                    "collection": "datasets",
                    "id": "fd252e24-6231-11e5-8986-ec814d819d13",
                    "title": "m"
                },
                {
                    "class": "H5L_TYPE_HARD",
                    "collection": "datasets",
                    "id": "fd252e25-6231-11e5-8996-ec814d819d13",
                    "title": "n"
                }
            ]
        }
    },
    "id": "ec06b81c-4538-48ad-b152-e905adc7f633",
    "root": "ec06b81c-4538-48ad-b152-e905adc7f633"
}

@hyoklee

should HPD server throw an error if DIMENSION_LIST size doesn't match the rank of dataset?

What is the JSON the app sends to the server when the DIMENSION_LIST attribute is created? What JSON is sent when an additional dimension scale is attached to the dataset?

created:
"value": [[ "datasets/fd252e24-6231-11e5-8986-ec814d819d13"]]

attached:
"value": [[ "datasets/fd252e24-6231-11e5-8986-ec814d819d13"], []]

If I remove [] manually using HPD, I can create the same template file.

Closing as not relevant any more.