jbusecke/xarrayutils

Extend checks for `file_exist_check`

jbusecke opened this issue · 0 comments

I have implemented a check to see if all zarr chunks were written in another project:

def _check_zarr_complete(store):
    zg = zarr.open_group(str(store))
    arrays = list(zg.arrays())
    complete = True
    for array in arrays:
        va = array[0]# variable name
        info_items = zg[va].info_items()
        # extract chunks initialized
        chunks_initialized = np.array([a for a in info_items if a[0]=='Chunks initialized'][0][1].split('/')).astype(int)        
        all_initialized = np.diff(np.array(chunks_initialized))
        # I had a case with 3/1 chunks initialized...not sure where that was from...
        # TODO: Find out under which circumstances this could happen and if >0 is ok as criterion. 
        # This was a string dimension variable in the case I encountered
        if all_initialized > 0: 
            
            complete = False
            print(f'{va} not fully written')
#             print(info_items)
    return complete

This could be implemented in file_exist_check