a patch to speed up data processing

Question

a patch to speed up data processing

Opened this issue a year ago · 1 comments

In VAME/vame/model/create_training.py traindata_aligned()

        if cfg['robust'] == True:
            iqr_val = iqr(X_z)
            print("IQR value: %.2f, IQR cutoff: %.2f" %(iqr_val, cfg['iqr_factor']*iqr_val))
            for i in range(X_z.shape[0]):
                for marker in range(X_z.shape[1]):
                    if X_z[i,marker] > cfg['iqr_factor']*iqr_val:
                        X_z[i,marker] = np.nan
                        
                    elif X_z[i,marker] < -cfg['iqr_factor']*iqr_val:
                        X_z[i,marker] = np.nan

to speedup

        if cfg['robust'] == True:
            iqr_val = iqr(X_z)
            print("IQR value: %.2f, IQR cutoff: %.2f" %(iqr_val, cfg['iqr_factor']*iqr_val))
            X_z[(X_z > cfg['iqr_factor']*iqr_val) |  (X_z < -cfg['iqr_factor']*iqr_val)] = np.nan

Answer 1 · 2023-05-30T05:02:43.000Z

In VAME/vame/model/create_training.py traindata_aligned()

    detect_anchors = np.std(X.T, axis=1)
    sort_anchors = np.sort(detect_anchors)
    if sort_anchors[0] == sort_anchors[1]:
        anchors = np.where(detect_anchors == sort_anchors[0])[0]
        anchor_1_temp = anchors[0]
        anchor_2_temp = anchors[1]
        
    else:
        anchor_1_temp = int(np.where(detect_anchors == sort_anchors[0])[0])
        anchor_2_temp = int(np.where(detect_anchors == sort_anchors[1])[0])
    
    if anchor_1_temp > anchor_2_temp:
        anchor_1 = anchor_1_temp
        anchor_2 = anchor_2_temp
        
    else:
        anchor_1 = anchor_2_temp
        anchor_2 = anchor_1_temp
    
    X = np.delete(X, anchor_1, 1)
    X = np.delete(X, anchor_2, 1)
    
    X = X.T

to speedup

detect_anchors = np.std(X, axis=0)
indsort = np.argsort(detect_anchors)
X = X[:, indsort[2:]]