Bug in crop method

Question

Bug in crop method

pranavbudhwant opened this issue 4 years ago · 11 comments

Hi,
I was visualizing the heatmap inputs for the model and I'm not sure what I'm doing wrong but the crop method doesn't seem to work. This is the original crop method from this repo:

def crop(img, center, scale, res, rot=0):
    img = im_to_numpy(img)

    # Preprocessing for efficient cropping
    ht, wd = img.shape[0], img.shape[1]
    sf = scale * 200.0 / res[0]
    if sf < 2:
        sf = 1
    else:
        new_size = int(np.math.floor(max(ht, wd) / sf))
        new_ht = int(np.math.floor(ht / sf))
        new_wd = int(np.math.floor(wd / sf))
        if new_size < 2:
            return torch.zeros(res[0], res[1], img.shape[2]) \
                        if len(img.shape) > 2 else torch.zeros(res[0], res[1])
        else:
            img = cv2.resize(img, (new_ht,new_wd))
            #img = scipy.misc.imresize(img, [new_ht, new_wd])
            center = center * 1.0 / sf
            scale = scale / sf

    # Upper left point
    ul = np.array(transform([0, 0], center, scale, res, invert=1))
    # Bottom right point
    br = np.array(transform(res, center, scale, res, invert=1))

    # Padding so that when rotated proper amount of context is included
    pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
    if not rot == 0:
        ul -= pad
        br += pad

    new_shape = [br[1] - ul[1], br[0] - ul[0]]
    if len(img.shape) > 2:
        new_shape += [img.shape[2]]
    new_img = np.zeros(new_shape)

    # Range to fill new array
    new_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
    new_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
    # Range to sample from original image
    old_x = max(0, ul[0]), min(img.shape[1], br[0])
    old_y = max(0, ul[1]), min(img.shape[0], br[1])
    new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]]

    if not rot == 0:
        # Remove padding
        new_img = scipy.misc.imrotate(new_img, rot)
        new_img = new_img[pad:-pad, pad:-pad]

    #new_img = im_to_torch(scipy.misc.imresize(new_img, res))
    new_img = im_to_torch(cv2.resize(new_img, tuple(res)))
    return new_img

I've replaced scipy.misc with cv2. On the other hand, this is the crop method from https://github.com/princeton-vl/pytorch_stacked_hourglass/blob/master/utils/img.py

def crop_newell(img, center, scale, res, rot=0):
    img = im_to_numpy(img)
    # Upper left point
    ul = np.array(transform([0, 0], center, scale, res, invert=1))
    # Bottom right point
    br = np.array(transform(res, center, scale, res, invert=1))

    new_shape = [br[1] - ul[1], br[0] - ul[0]]
    if len(img.shape) > 2:
        print(img.shape)
        new_shape += [img.shape[2]]
    new_img = np.zeros(new_shape)

    # Range to fill new array
    new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
    new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
    # Range to sample from original image
    old_x = max(0, ul[0]), min(len(img[0]), br[0])
    old_y = max(0, ul[1]), min(len(img), br[1])
    new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]]

    new_img = im_to_torch(cv2.resize(new_img, tuple(res)))
    return new_img

These are the results I get: (Left: crop_newell, Right: crop)

As you can see, the crop method sometimes works well, and sometimes doesn't. It's usually the latter.
What could be the issue? Am I doing something wrong? @bearpaw

Answer 1 · 2020-06-04T23:09:10.000Z

You mentioned you use cv2 instead of scipy. Would you get the same wrong crops by using the original code? Can you verify that the replaced resize function and crop function are doing the same thing as the original ones?

Answer 2 · 2020-06-14T02:51:54.000Z

Someone already told this issue on 19 Feb #112.

Answer 3 · 2020-06-14T17:21:50.000Z

@aFewThings Thanks. I've merged the PR.
@pranavbudhwant Can you try to pull the latest update and see if it solves the bug? Thanks

Answer 4 · 2020-06-18T08:46:37.000Z

Hi,
I was visualizing the heatmap inputs for the model and I'm not sure what I'm doing wrong but the crop method doesn't seem to work. This is the original crop method from this repo:
def crop(img, center, scale, res, rot=0):
img = im_to_numpy(img)

# Preprocessing for efficient cropping
ht, wd = img.shape[0], img.shape[1]
sf = scale * 200.0 / res[0]
if sf < 2:
    sf = 1
else:
    new_size = int(np.math.floor(max(ht, wd) / sf))
    new_ht = int(np.math.floor(ht / sf))
    new_wd = int(np.math.floor(wd / sf))
    if new_size < 2:
        return torch.zeros(res[0], res[1], img.shape[2]) \
                    if len(img.shape) > 2 else torch.zeros(res[0], res[1])
    else:
        img = cv2.resize(img, (new_ht,new_wd))
        #img = scipy.misc.imresize(img, [new_ht, new_wd])
        center = center * 1.0 / sf
        scale = scale / sf

# Upper left point
ul = np.array(transform([0, 0], center, scale, res, invert=1))
# Bottom right point
br = np.array(transform(res, center, scale, res, invert=1))

# Padding so that when rotated proper amount of context is included
pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2)
if not rot == 0:
    ul -= pad
    br += pad

new_shape = [br[1] - ul[1], br[0] - ul[0]]
if len(img.shape) > 2:
    new_shape += [img.shape[2]]
new_img = np.zeros(new_shape)

# Range to fill new array
new_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
new_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
# Range to sample from original image
old_x = max(0, ul[0]), min(img.shape[1], br[0])
old_y = max(0, ul[1]), min(img.shape[0], br[1])
new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]]

if not rot == 0:
    # Remove padding
    new_img = scipy.misc.imrotate(new_img, rot)
    new_img = new_img[pad:-pad, pad:-pad]

#new_img = im_to_torch(scipy.misc.imresize(new_img, res))
new_img = im_to_torch(cv2.resize(new_img, tuple(res)))
return new_img

I've replaced scipy.misc with cv2. On the other hand, this is the crop method from https://github.com/princeton-vl/pytorch_stacked_hourglass/blob/master/utils/img.py
def crop_newell(img, center, scale, res, rot=0):
img = im_to_numpy(img)
# Upper left point
ul = np.array(transform([0, 0], center, scale, res, invert=1))
# Bottom right point
br = np.array(transform(res, center, scale, res, invert=1))

new_shape = [br[1] - ul[1], br[0] - ul[0]]
if len(img.shape) > 2:
    print(img.shape)
    new_shape += [img.shape[2]]
new_img = np.zeros(new_shape)

# Range to fill new array
new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0]
new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1]
# Range to sample from original image
old_x = max(0, ul[0]), min(len(img[0]), br[0])
old_y = max(0, ul[1]), min(len(img), br[1])
new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = img[old_y[0]:old_y[1], old_x[0]:old_x[1]]

new_img = im_to_torch(cv2.resize(new_img, tuple(res)))
return new_img

These are the results I get: (Left: crop_newell, Right: crop)

As you can see, the crop method sometimes works well, and sometimes doesn't. It's usually the latter.
What could be the issue? Am I doing something wrong? @bearpaw
Excuse me, under what environment （python2.7+pytorch0.4+ubuntu?）did you run this project

Answer 5 · 2020-06-18T08:48:07.000Z

Excuse me, under what environment （python2.7+pytorch0.4+ubuntu?）did you run this project @pranavbudhwant

Answer 6 · 2020-06-24T00:05:04.000Z

@aFewThings Thanks. I've merged the PR.
@pranavbudhwant Can you try to pull the latest update and see if it solves the bug? Thanks

@bearpaw I've tried the latest update but it still doesn't solve the bug.. I have not been able to verify if the original code gives the same issue as scipy.misc no longer supports these methods. I'll prolly have to downgrade to an earlier version to check.

I have modified the getitem method from the mpii dataset, to turn off augmentations & switch between the crop methods:

    def __getitem__(self, index, newell=True, aug=True):
        #print('__getitem__')
        sf = self.scale_factor
        rf = self.rot_factor
        if self.is_train:
            a = self.anno[self.train_list[index]]
        else:
            a = self.anno[self.valid_list[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based

        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        s = a['scale_provided']

        # Adjust center/scale slightly to avoid cropping limbs
        if c[0] != -1:
            c[1] = c[1] + 15 * s
            s = s * 1.25

        # For single-person pose estimation with a centered/scaled figure
        nparts = pts.size(0)
        img = load_image(img_path)  # CxHxW

        r = 0
        if self.is_train and aug:
            #s = (s*N(0,1)*scale_factor+1) -> clamp b/w [1-scale_factor,1+scale_factor]
            s = s*torch.randn(1).mul_(sf).add_(1).clamp(1-sf, 1+sf)[0]
            #r = {(N(0,1)*rot_factor) -> clamp b/w [-2*rot_factor,2*rot_factor]} w/ prob 0.6, {0} w/ prob 0.4
            r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.3 else 0

            # Flip Horizontally w/ prob 0.5
            if random.random() <= 0.5:
                img = torch.from_numpy(fliplr(img.numpy())).float()
                pts = shufflelr(pts, width=img.size(2), dataset='mpii')
                c[0] = img.size(2) - c[0]

            if random.random() < 0: #<=0.5
                # Color Augmentation
                img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
                img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
                img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)

        # Prepare image and groundtruth map
        if not newell:
            inp = crop_fixed(img, c, s, [self.inp_res,self.inp_res], rot=r)
        else:
            inp = crop_newell(img, c, s, [self.inp_res,self.inp_res], rot=r)
        if aug:
            inp = color_normalize(inp, self.mean, self.std)

        # Generate ground truth
        tpts = pts.clone()
        target = torch.zeros(nparts, self.out_res, self.out_res) #nJoints,H,W
        target_weight = tpts[:, 2].clone().view(nparts, 1)

        for i in range(nparts):
            # if tpts[i, 2] > 0: # This is evil!!
            if tpts[i, 1] > 0:
                #Transform the point from original image coords to new cropped,rotated image coords
                tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2]+1, c, s, [self.out_res, self.out_res], rot=r))

                #Generate heatmap & visibility of gaussian
                target[i], vis = draw_labelmap(target[i], tpts[i]-1, self.sigma, type=self.label_type)

                #If gaussian for point lies within image bounds, vis=1; else vis=0
                target_weight[i, 0] *= vis

        # Meta info
        meta = {'index' : index, 'center' : c, 'scale' : s,
        'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight}

        return inp, target, meta

And here's the method I use to visualize the differences:

def compare_crops(idx,aug):
    #Newell
    inp, target, meta = train_dataset.__getitem__(idx,newell=True,aug=aug)
    
    center = meta['center'].numpy()

    try:
        center = transform(center,center,float(meta['scale'].numpy()),(256,256))
    except:
        center = transform(center,center,float(meta['scale']),(256,256))

    im = im_to_numpy(inp)
    tar = im_to_numpy(target)
    tar = cv2.resize(tar,(256,256))
    hm = np.max(tar,axis=2)

    plt.subplot(121)
    plt.imshow(hm,cmap='gray')
    plt.imshow(im,alpha=0.5)
    plt.scatter(center[0],center[1],color='red')
    plt.axis('off')

    #Bearpaw
    inp, target, meta = train_dataset.__getitem__(idx,newell=False,aug=aug)
    
    center = meta['center'].numpy()

    try:
        center = transform(center,center,float(meta['scale'].numpy()),(256,256))
    except:
        center = transform(center,center,float(meta['scale']),(256,256))

    im = im_to_numpy(inp)
    tar = im_to_numpy(target)
    tar = cv2.resize(tar,(256,256))
    hm = np.max(tar,axis=2)

    plt.subplot(122)
    plt.imshow(hm,cmap='gray')
    plt.imshow(im,alpha=0.5)
    plt.scatter(center[0],center[1],color='red')
    plt.axis('off')

    plt.show()

Here's a few examples from the updated crop method (aug=False)(left:newell, right: updated crop) :

@xiaoxin05 I'm using Python 3.8.3, Pytorch 1.5.0 - CUDA 10.2, cuDNN 7.0, Windows 10

Answer 7 · 2020-06-24T01:27:52.000Z

@pranavbudhwant   Thank you，I tried it on ubuntu16.04 ，pytorch1.00，cuda9.0，cuDNN7.0，python2.7 about  test.py ，It can do it.

…

------------------ 原始邮件 ------------------ 发件人: "Pranav Budhwant"<notifications@github.com>; 发送时间: 2020年6月24日(星期三) 上午8:05 收件人: "bearpaw/pytorch-pose"<pytorch-pose@noreply.github.com>; 抄送: "张雯欣"<912650845@qq.com>; "Mention"<mention@noreply.github.com>; 主题: Re: [bearpaw/pytorch-pose] Bug in crop method (#115) @aFewThings Thanks. I've merged the PR. @pranavbudhwant Can you try to pull the latest update and see if it solves the bug? Thanks @bearpaw I've tried the latest update but it still doesn't solve the bug.. I have not been able to verify if the original code gives the same issue as scipy.misc no longer supports these methods. I'll prolly have to downgrade to an earlier version to check. I have modified the getitem method from the mpii dataset, to turn off augmentations & switch between the crop methods: def __getitem__(self, index, newell=True, aug=True): #print('__getitem__') sf = self.scale_factor rf = self.rot_factor if self.is_train: a = self.anno[self.train_list[index]] else: a = self.anno[self.valid_list[index]] img_path = os.path.join(self.img_folder, a['img_paths']) pts = torch.Tensor(a['joint_self']) # pts[:, 0:2] -= 1 # Convert pts to zero based # c = torch.Tensor(a['objpos']) - 1 c = torch.Tensor(a['objpos']) s = a['scale_provided'] # Adjust center/scale slightly to avoid cropping limbs if c[0] != -1: c[1] = c[1] + 15 * s s = s * 1.25 # For single-person pose estimation with a centered/scaled figure nparts = pts.size(0) img = load_image(img_path) # CxHxW r = 0 if self.is_train and aug: #s = (s*N(0,1)*scale_factor+1) -> clamp b/w [1-scale_factor,1+scale_factor] s = s*torch.randn(1).mul_(sf).add_(1).clamp(1-sf, 1+sf)[0] #r = {(N(0,1)*rot_factor) -> clamp b/w [-2*rot_factor,2*rot_factor]} w/ prob 0.6, {0} w/ prob 0.4 r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.3 else 0 # Flip Horizontally w/ prob 0.5 if random.random() <= 0.5: img = torch.from_numpy(fliplr(img.numpy())).float() pts = shufflelr(pts, width=img.size(2), dataset='mpii') c[0] = img.size(2) - c[0] if random.random() < 0: #<=0.5 # Color Augmentation img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) # Prepare image and groundtruth map if not newell: inp = crop_fixed(img, c, s, [self.inp_res,self.inp_res], rot=r) else: inp = crop_newell(img, c, s, [self.inp_res,self.inp_res], rot=r) if aug: inp = color_normalize(inp, self.mean, self.std) # Generate ground truth tpts = pts.clone() target = torch.zeros(nparts, self.out_res, self.out_res) #nJoints,H,W target_weight = tpts[:, 2].clone().view(nparts, 1) for i in range(nparts): # if tpts[i, 2] > 0: # This is evil!! if tpts[i, 1] > 0: #Transform the point from original image coords to new cropped,rotated image coords tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2]+1, c, s, [self.out_res, self.out_res], rot=r)) #Generate heatmap & visibility of gaussian target[i], vis = draw_labelmap(target[i], tpts[i]-1, self.sigma, type=self.label_type) #If gaussian for point lies within image bounds, vis=1; else vis=0 target_weight[i, 0] *= vis # Meta info meta = {'index' : index, 'center' : c, 'scale' : s, 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight} return inp, target, meta And here's the method I use to visualize the differences: def compare_crops(idx,aug): #Newell inp, target, meta = train_dataset.__getitem__(idx,newell=True,aug=aug) center = meta['center'].numpy() try: center = transform(center,center,float(meta['scale'].numpy()),(256,256)) except: center = transform(center,center,float(meta['scale']),(256,256)) im = im_to_numpy(inp) tar = im_to_numpy(target) tar = cv2.resize(tar,(256,256)) hm = np.max(tar,axis=2) plt.subplot(121) plt.imshow(hm,cmap='gray') plt.imshow(im,alpha=0.5) plt.scatter(center[0],center[1],color='red') plt.axis('off') #Bearpaw inp, target, meta = train_dataset.__getitem__(idx,newell=False,aug=aug) center = meta['center'].numpy() try: center = transform(center,center,float(meta['scale'].numpy()),(256,256)) except: center = transform(center,center,float(meta['scale']),(256,256)) im = im_to_numpy(inp) tar = im_to_numpy(target) tar = cv2.resize(tar,(256,256)) hm = np.max(tar,axis=2) plt.subplot(122) plt.imshow(hm,cmap='gray') plt.imshow(im,alpha=0.5) plt.scatter(center[0],center[1],color='red') plt.axis('off') plt.show() Here's a few examples from the updated crop method (aug=False)(left:newell, right: updated crop) : @xiaoxin05 I'm using Python 3.8.3, Pytorch 1.5.0 - CUDA 10.2, cuDNN 7.0, Windows 10 — You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub, or unsubscribe.

Answer 8 · 2020-06-24T13:12:25.000Z

@xiaoxin05 I'm not sure I follow, are you saying both the methods give same results on your configuration?

Answer 9 · 2020-06-25T04:13:41.000Z

There is a small bug in the last merge. The dsize of cv2.resize method should be in order (width, height), not (height, width). #116 here is a quick fix.

Answer 10 · 2020-06-25T11:22:45.000Z

@jiangwei221 Thank you! This partially fixed the issue. Another issue occurs with the imutils.rotate_bound method, as it rotates the image clockwise, so it requires angle=-rot.

@bearpaw here's a quick fix #117, the crop method now works as expected.

Answer 11 · 2020-06-25T12:02:33.000Z

Actually, after doing some more debugging, I think the transform method works for normal rotation and not for bounded rotation. As sometimes with bounded rotation, the image shrinks and the heatmaps no longer overlap the joints like this:

So, I've switched back to normal rotation in #117