IoUMetricsCallback error, iou too small
WangLibo1995 opened this issue · 3 comments
The same dataset, I use mmseg get a val iou 0.8, but I use IoUMetricsCallback get a val iou 0.64. I am sure that mmseg's iou value is correct.
I think it is not necessary to compute the score on every single image, the correct method is accumulation. the following is code and I have tested it.
class Evaluator(object):
def init(self, num_class):
self.num_class = num_class
self.confusion_matrix = np.zeros((self.num_class,)*2)
def get_tp_fp_tn_fn(self):
tp = np.diag(self.confusion_matrix)
fp = self.confusion_matrix.sum(axis=1) - np.diag(self.confusion_matrix)
fn = self.confusion_matrix.sum(axis=0) - np.diag(self.confusion_matrix)
tn = np.diag(self.confusion_matrix).sum() - np.diag(self.confusion_matrix)
return tp, fp, tn, fn
def F1(self):
tp, fp, tn, fn = self.get_tp_fp_tn_fn()
Precision = tp / (tp + fp)
Recall = tp / (tp + fn)
F1 = (2.0 * Precision * Recall) / (Precision + Recall)
return F1
def OA(self):
OA = np.diag(self.confusion_matrix).sum() / self.confusion_matrix.sum()
return OA
def OA_remove_bg(self):
bg = np.diag(self.confusion_matrix)[-1]
OA_remove_bg = (np.diag(self.confusion_matrix).sum() - bg) / (self.confusion_matrix[:-1, :-1].sum())
return OA_remove_bg
def Intersection_over_Union(self):
# IoU = np.diag(self.confusion_matrix) / (
# np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
# np.diag(self.confusion_matrix))
tp, fp, tn, fn = self.get_tp_fp_tn_fn()
IoU = tp / (tp + fn + fp)
return IoU
def Dice(self):
# IoU = np.diag(self.confusion_matrix) / (
# np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
# np.diag(self.confusion_matrix))
# MIoU = np.nanmean(IoU)
# MDice = (2*MIoU) / (MIoU + 1.0)
tp, fp, tn, fn = self.get_tp_fp_tn_fn()
Dice = 2 * tp / ((tp + fp) + (tp + fn))
return Dice
def Pixel_Accuracy_Class(self):
# TP TP+FP
Acc = np.diag(self.confusion_matrix) / self.confusion_matrix.sum(axis=1)
return Acc
def Frequency_Weighted_Intersection_over_Union(self):
freq = np.sum(self.confusion_matrix, axis=1) / np.sum(self.confusion_matrix)
iou = self.Intersection_over_Union()
FWIoU = (freq[freq > 0] * iou[freq > 0]).sum()
return FWIoU
def _generate_matrix(self, gt_image, pre_image):
mask = (gt_image >= 0) & (gt_image < self.num_class)
label = self.num_class * gt_image[mask].astype('int') + pre_image[mask]
count = np.bincount(label, minlength=self.num_class**2)
confusion_matrix = count.reshape(self.num_class, self.num_class)
return confusion_matrix
def add_batch(self, gt_image, pre_image):
assert gt_image.shape == pre_image.shape
self.confusion_matrix += self._generate_matrix(gt_image, pre_image)
def reset(self):
self.confusion_matrix = np.zeros((self.num_class,) * 2)
class EvaluateCallBack(Callback):
def init(
self,
class_names=None, # tuple.('water','tree','person',...)
num_classes: int = None, # assert len(class_names)==num_classes
prefix=('iou', 'fwiou', 'OA', 'MPA', 'F1', 'dice'),
input_key: str = "targets",
output_key: str = "logits",
remove_background=True
):
super().init(CallbackOrder.Metric)
self.output_key = output_key
self.input_key = input_key
self.class_names = class_names
self.num_classes = num_classes
self.prefix = prefix
self.evaluator = Evaluator(num_class=self.num_classes)
self.remove_background = remove_background
def on_loader_start(self, state):
self.evaluator.reset()
@torch.no_grad()
def on_batch_end(self, runner: IRunner):
outputs = runner.output[self.output_key].detach()
targets = runner.input[self.input_key].detach()
batch_size = targets.size(0)
for image_index in range(batch_size):
y_pred = outputs[image_index]
y_pred = y_pred.argmax(dim=0).cpu().numpy()
y_true = targets[image_index].cpu().numpy()
self.evaluator.add_batch(pre_image=y_pred, gt_image=y_true)
if self.remove_background:
runner.batch_metrics[self.prefix[0]] = np.nanmean(self.evaluator.Intersection_over_Union()[:-1])
runner.batch_metrics[self.prefix[1]] = np.nanmean(self.evaluator.Frequency_Weighted_Intersection_over_Union())
runner.batch_metrics[self.prefix[2]] = self.evaluator.OA_remove_bg()
runner.batch_metrics[self.prefix[3]] = np.nanmean(self.evaluator.Pixel_Accuracy_Class()[:-1])
runner.batch_metrics[self.prefix[4]] = np.nanmean(self.evaluator.F1()[:-1])
runner.batch_metrics[self.prefix[5]] = np.nanmean(self.evaluator.Dice()[:-1])
else:
runner.batch_metrics[self.prefix[0]] = np.nanmean(self.evaluator.Intersection_over_Union())
runner.batch_metrics[self.prefix[1]] = np.nanmean(self.evaluator.Frequency_Weighted_Intersection_over_Union())
runner.batch_metrics[self.prefix[2]] = self.evaluator.OA()
runner.batch_metrics[self.prefix[3]] = np.nanmean(self.evaluator.Pixel_Accuracy_Class())
runner.batch_metrics[self.prefix[4]] = np.nanmean(self.evaluator.F1())
runner.batch_metrics[self.prefix[5]] = np.nanmean(self.evaluator.Dice())
def on_loader_end(self, runner: IRunner):
if self.remove_background:
runner.batch_metrics[self.prefix[0]] = np.nanmean(self.evaluator.Intersection_over_Union()[:-1])
runner.batch_metrics[self.prefix[1]] = np.nanmean(
self.evaluator.Frequency_Weighted_Intersection_over_Union())
runner.batch_metrics[self.prefix[2]] = self.evaluator.OA_remove_bg()
runner.batch_metrics[self.prefix[3]] = np.nanmean(self.evaluator.Pixel_Accuracy_Class()[:-1])
runner.batch_metrics[self.prefix[4]] = np.nanmean(self.evaluator.F1()[:-1])
runner.batch_metrics[self.prefix[5]] = np.nanmean(self.evaluator.Dice()[:-1])
else:
runner.batch_metrics[self.prefix[0]] = np.nanmean(self.evaluator.Intersection_over_Union())
runner.batch_metrics[self.prefix[1]] = np.nanmean(
self.evaluator.Frequency_Weighted_Intersection_over_Union())
runner.batch_metrics[self.prefix[2]] = self.evaluator.OA()
runner.batch_metrics[self.prefix[3]] = np.nanmean(self.evaluator.Pixel_Accuracy_Class())
runner.batch_metrics[self.prefix[4]] = np.nanmean(self.evaluator.F1())
runner.batch_metrics[self.prefix[5]] = np.nanmean(self.evaluator.Dice())
iou_per_class = self.evaluator.Intersection_over_Union()
f1_per_class = self.evaluator.F1()
if self.class_names is not None:
assert len(self.class_names) == self.num_classes
for class_name, class_iou, class_f1 in zip(self.class_names, iou_per_class, f1_per_class):
runner.loader_metrics['iou_' + class_name] = float(class_iou)
runner.loader_metrics['F1_' + class_name] = float(class_f1)
Do you have any references to papers which state that multi-class IoU computed per-epoch, not per-image?
Do you have any references to papers which state that multi-class IoU computed per-epoch, not per-image?
In my experiment, epoch-iou is more closer to paper's iou. It's not hard to understand. For example, image A only has a cat, image B only has a dog , they are both 100% predicted correctly. In case of iou-per-image, image A : iou-cat=1, iou-dog=0, image B : iou-cat=0, iou-dog=1, so final iou-cat=0.5, final iou-dog = 0.5. However, in case of iou-per-epoch, final iou-cat and iou-dog are both 1. Now, model predicts perfectly on dog and cat, the iou should be 1.
Finally, thank you for your excellent work, it helps me a lot in my competition