Extend checks for `file_exist_check`
jbusecke opened this issue · 0 comments
jbusecke commented
I have implemented a check to see if all zarr chunks were written in another project:
def _check_zarr_complete(store):
zg = zarr.open_group(str(store))
arrays = list(zg.arrays())
complete = True
for array in arrays:
va = array[0]# variable name
info_items = zg[va].info_items()
# extract chunks initialized
chunks_initialized = np.array([a for a in info_items if a[0]=='Chunks initialized'][0][1].split('/')).astype(int)
all_initialized = np.diff(np.array(chunks_initialized))
# I had a case with 3/1 chunks initialized...not sure where that was from...
# TODO: Find out under which circumstances this could happen and if >0 is ok as criterion.
# This was a string dimension variable in the case I encountered
if all_initialized > 0:
complete = False
print(f'{va} not fully written')
# print(info_items)
return complete
This could be implemented in file_exist_check