BloodAxe/pytorch-toolbelt

IoUMetricsCallback error, iou too small

WangLibo1995 opened this issue · 3 comments

The same dataset, I use mmseg get a val iou 0.8, but I use IoUMetricsCallback get a val iou 0.64. I am sure that mmseg's iou value is correct.

I think it is not necessary to compute the score on every single image, the correct method is accumulation. the following is code and I have tested it.

class Evaluator(object):
def init(self, num_class):
self.num_class = num_class
self.confusion_matrix = np.zeros((self.num_class,)*2)

def get_tp_fp_tn_fn(self):
    tp = np.diag(self.confusion_matrix)
    fp = self.confusion_matrix.sum(axis=1) - np.diag(self.confusion_matrix)
    fn = self.confusion_matrix.sum(axis=0) - np.diag(self.confusion_matrix)
    tn = np.diag(self.confusion_matrix).sum() - np.diag(self.confusion_matrix)
    return tp, fp, tn, fn

def F1(self):
    tp, fp, tn, fn = self.get_tp_fp_tn_fn()
    Precision = tp / (tp + fp)
    Recall = tp / (tp + fn)
    F1 = (2.0 * Precision * Recall) / (Precision + Recall)
    return F1

def OA(self):
    OA = np.diag(self.confusion_matrix).sum() / self.confusion_matrix.sum()
    return OA

def OA_remove_bg(self):
    bg = np.diag(self.confusion_matrix)[-1]
    OA_remove_bg = (np.diag(self.confusion_matrix).sum() - bg) / (self.confusion_matrix[:-1, :-1].sum())
    return OA_remove_bg

def Intersection_over_Union(self):
    # IoU = np.diag(self.confusion_matrix) / (
    #         np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
    #         np.diag(self.confusion_matrix))
    tp, fp, tn, fn = self.get_tp_fp_tn_fn()
    IoU = tp / (tp + fn + fp)
    return IoU

def Dice(self):
    # IoU = np.diag(self.confusion_matrix) / (
    #         np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
    #         np.diag(self.confusion_matrix))
    # MIoU = np.nanmean(IoU)
    # MDice = (2*MIoU) / (MIoU + 1.0)
    tp, fp, tn, fn = self.get_tp_fp_tn_fn()
    Dice = 2 * tp / ((tp + fp) + (tp + fn))
    return Dice

def Pixel_Accuracy_Class(self):
    #         TP                                  TP+FP
    Acc = np.diag(self.confusion_matrix) / self.confusion_matrix.sum(axis=1)
    return Acc

def Frequency_Weighted_Intersection_over_Union(self):
    freq = np.sum(self.confusion_matrix, axis=1) / np.sum(self.confusion_matrix)
    iou = self.Intersection_over_Union()
    FWIoU = (freq[freq > 0] * iou[freq > 0]).sum()
    return FWIoU

def _generate_matrix(self, gt_image, pre_image):
    mask = (gt_image >= 0) & (gt_image < self.num_class)
    label = self.num_class * gt_image[mask].astype('int') + pre_image[mask]
    count = np.bincount(label, minlength=self.num_class**2)
    confusion_matrix = count.reshape(self.num_class, self.num_class)
    return confusion_matrix

def add_batch(self, gt_image, pre_image):
    assert gt_image.shape == pre_image.shape
    self.confusion_matrix += self._generate_matrix(gt_image, pre_image)

def reset(self):
    self.confusion_matrix = np.zeros((self.num_class,) * 2)

class EvaluateCallBack(Callback):
def init(
self,
class_names=None, # tuple.('water','tree','person',...)
num_classes: int = None, # assert len(class_names)==num_classes
prefix=('iou', 'fwiou', 'OA', 'MPA', 'F1', 'dice'),
input_key: str = "targets",
output_key: str = "logits",
remove_background=True
):
super().init(CallbackOrder.Metric)

    self.output_key = output_key
    self.input_key = input_key
    self.class_names = class_names
    self.num_classes = num_classes
    self.prefix = prefix
    self.evaluator = Evaluator(num_class=self.num_classes)
    self.remove_background = remove_background

def on_loader_start(self, state):
    self.evaluator.reset()

@torch.no_grad()
def on_batch_end(self, runner: IRunner):
    outputs = runner.output[self.output_key].detach()
    targets = runner.input[self.input_key].detach()

    batch_size = targets.size(0)
    for image_index in range(batch_size):
        y_pred = outputs[image_index]
        y_pred = y_pred.argmax(dim=0).cpu().numpy()
        y_true = targets[image_index].cpu().numpy()
        self.evaluator.add_batch(pre_image=y_pred, gt_image=y_true)
    if self.remove_background:
        runner.batch_metrics[self.prefix[0]] = np.nanmean(self.evaluator.Intersection_over_Union()[:-1])
        runner.batch_metrics[self.prefix[1]] = np.nanmean(self.evaluator.Frequency_Weighted_Intersection_over_Union())
        runner.batch_metrics[self.prefix[2]] = self.evaluator.OA_remove_bg()
        runner.batch_metrics[self.prefix[3]] = np.nanmean(self.evaluator.Pixel_Accuracy_Class()[:-1])
        runner.batch_metrics[self.prefix[4]] = np.nanmean(self.evaluator.F1()[:-1])
        runner.batch_metrics[self.prefix[5]] = np.nanmean(self.evaluator.Dice()[:-1])
    else:
        runner.batch_metrics[self.prefix[0]] = np.nanmean(self.evaluator.Intersection_over_Union())
        runner.batch_metrics[self.prefix[1]] = np.nanmean(self.evaluator.Frequency_Weighted_Intersection_over_Union())
        runner.batch_metrics[self.prefix[2]] = self.evaluator.OA()
        runner.batch_metrics[self.prefix[3]] = np.nanmean(self.evaluator.Pixel_Accuracy_Class())
        runner.batch_metrics[self.prefix[4]] = np.nanmean(self.evaluator.F1())
        runner.batch_metrics[self.prefix[5]] = np.nanmean(self.evaluator.Dice())

def on_loader_end(self, runner: IRunner):
    if self.remove_background:
        runner.batch_metrics[self.prefix[0]] = np.nanmean(self.evaluator.Intersection_over_Union()[:-1])
        runner.batch_metrics[self.prefix[1]] = np.nanmean(
            self.evaluator.Frequency_Weighted_Intersection_over_Union())
        runner.batch_metrics[self.prefix[2]] = self.evaluator.OA_remove_bg()
        runner.batch_metrics[self.prefix[3]] = np.nanmean(self.evaluator.Pixel_Accuracy_Class()[:-1])
        runner.batch_metrics[self.prefix[4]] = np.nanmean(self.evaluator.F1()[:-1])
        runner.batch_metrics[self.prefix[5]] = np.nanmean(self.evaluator.Dice()[:-1])
    else:
        runner.batch_metrics[self.prefix[0]] = np.nanmean(self.evaluator.Intersection_over_Union())
        runner.batch_metrics[self.prefix[1]] = np.nanmean(
            self.evaluator.Frequency_Weighted_Intersection_over_Union())
        runner.batch_metrics[self.prefix[2]] = self.evaluator.OA()
        runner.batch_metrics[self.prefix[3]] = np.nanmean(self.evaluator.Pixel_Accuracy_Class())
        runner.batch_metrics[self.prefix[4]] = np.nanmean(self.evaluator.F1())
        runner.batch_metrics[self.prefix[5]] = np.nanmean(self.evaluator.Dice())
    iou_per_class = self.evaluator.Intersection_over_Union()
    f1_per_class = self.evaluator.F1()
    if self.class_names is not None:
        assert len(self.class_names) == self.num_classes
        for class_name, class_iou, class_f1 in zip(self.class_names, iou_per_class, f1_per_class):
            runner.loader_metrics['iou_' + class_name] = float(class_iou)
            runner.loader_metrics['F1_' + class_name] = float(class_f1)

Do you have any references to papers which state that multi-class IoU computed per-epoch, not per-image?

Do you have any references to papers which state that multi-class IoU computed per-epoch, not per-image?

In my experiment, epoch-iou is more closer to paper's iou. It's not hard to understand. For example, image A only has a cat, image B only has a dog , they are both 100% predicted correctly. In case of iou-per-image, image A : iou-cat=1, iou-dog=0, image B : iou-cat=0, iou-dog=1, so final iou-cat=0.5, final iou-dog = 0.5. However, in case of iou-per-epoch, final iou-cat and iou-dog are both 1. Now, model predicts perfectly on dog and cat, the iou should be 1.

Finally, thank you for your excellent work, it helps me a lot in my competition