The help for the evaluation
Wangdanchunbufuz opened this issue · 4 comments
Wangdanchunbufuz commented
davidnvq commented
Thanks for asking!
May you find the evaluation code/notebook from M2 Trasnformer repo?
I remember that I took the code from there.
Wangdanchunbufuz commented
davidnvq commented
Sorry for my bad memory. Luckily, I've just checked the old code and found that the evaluation based on this repo, not M2 transformer: https://github.com/salaniz/pycocoevalcap.
To get 'Object Attr. Relation Color Count Size', you need to modify the code of https://github.com/salaniz/pycocoevalcap/blob/master/spice/spice.py as follows:
from __future__ import division
import os
import sys
import subprocess
import threading
import json
import numpy as np
import ast
import tempfile
import math
# Assumes spice.jar is in the same directory as spice.py. Change as needed.
SPICE_JAR = 'spice-1.0.jar'
TEMP_DIR = 'tmp'
CACHE_DIR = 'cache'
class Spice:
"""
Main Class to compute the SPICE metric
"""
def float_convert(self, obj):
try:
return float(obj)
except:
return np.nan
def compute_score(self, gts, res):
assert (sorted(gts.keys()) == sorted(res.keys()))
imgIds = sorted(gts.keys())
# Prepare temp input file for the SPICE scorer
input_data = []
for id in imgIds:
hypo = res[id]
ref = gts[id]
# Sanity check.
assert (type(hypo) is list)
assert (len(hypo) == 1)
assert (type(ref) is list)
assert (len(ref) >= 1)
input_data.append({"image_id": id, "test": hypo[0], "refs": ref})
cwd = os.path.dirname(os.path.abspath(__file__))
temp_dir = os.path.join(cwd, TEMP_DIR)
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
in_file = tempfile.NamedTemporaryFile(mode='w+', delete=False, dir=temp_dir)
json.dump(input_data, in_file, indent=2)
in_file.close()
# Start job
out_file = tempfile.NamedTemporaryFile(mode='w+', delete=False, dir=temp_dir)
out_file.close()
cache_dir = os.path.join(cwd, CACHE_DIR)
if not os.path.exists(cache_dir):
os.makedirs(cache_dir)
spice_cmd = [
'java', '-jar', '-Xmx8G', SPICE_JAR, in_file.name, '-cache', cache_dir, '-out', out_file.name, '-subset',
'-silent'
]
subprocess.check_call(spice_cmd, cwd=os.path.dirname(os.path.abspath(__file__)))
# Read and process results
with open(out_file.name) as data_file:
results = json.load(data_file)
os.remove(in_file.name)
os.remove(out_file.name)
imgId_to_scores = {}
spice_scores = []
keys = ['Relation', 'Cardinality', 'Color', 'Attribute', 'Object', 'Size']
other_scores = {key: [] for key in keys}
for item in results:
imgId_to_scores[item['image_id']] = item['scores']
spice_scores.append(self.float_convert(item['scores']['All']['f']))
for key in keys:
value = self.float_convert(item['scores'][key]['f'])
if not math.isnan(value):
other_scores[key].append(value)
for key in keys:
score = np.mean(np.array(other_scores[key]))
print(f"SPICE key: {key} = {score}")
average_score = np.mean(np.array(spice_scores))
scores = []
for image_id in imgIds:
# Convert none to NaN before saving scores over subcategories
score_set = {}
for category, score_tuple in imgId_to_scores[image_id].items():
score_set[category] = {k: self.float_convert(v) for k, v in score_tuple.items()}
scores.append(score_set)
print(f"SPICE Score: avg = {average_score}")
return average_score, scores
def method(self):
return "SPICE"
davidnvq commented
I can't find my old code for CLIP score. However, you can easily follow this repo to compute:
https://github.com/jmhessel/clipscore