Evaluation Toolkit
Closed this issue · 1 comments
Neptune-Trojans commented
Hi,
I see that the format of the evaluation toolkit different then the training dataset.
Do you have a script that can take folder images structure and turn them to the evaluation format ?
Thanks
mk-minchul commented
Here is a sample function to create a verification evaluation dataset.
import pyrootutils
root = pyrootutils.setup_root(
search_from=__file__,
indicator=["__root__.txt"],
pythonpath=True,
dotenv=True,
)
import argparse
import numpy as np
np.bool = np.bool_ # fix bug for mxnet 1.9.1
import os
from datasets import Dataset
from functools import partial
import re
def natural_sort(l):
convert = lambda text: int(text) if text.isdigit() else text.lower()
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
return sorted(l, key=alphanum_key)
def entry_for_row(index, image, is_same):
return {
"image": image,
"index": index,
"is_same": is_same
}
def generate_entries(image_list, is_same_list):
for index, (image, is_same) in enumerate(zip(image_list, is_same_list)):
yield entry_for_row(index, image, is_same)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Export bin to rec')
parser.add_argument('--save_dir', type=str, default='/ssd2/data/faces/facerec_val',)
args = parser.parse_args()
pil_images, issame_list = some_function_to_get_list_of_images_and_issame_list()
# pil_images = [Image.open(f) for f in some_path]
# issame_list = [True, False] * (len(pil_images) // 2)
# make issame_list repeat twice
repeated_issame_list = np.stack([issame_list, issame_list], axis=0).transpose().flatten().tolist()
assert repeated_issame_list[::2] == issame_list
assert not repeated_issame_list[:len(issame_list)] == issame_list
assert len(repeated_issame_list) == len(pil_images)
generator = partial(generate_entries, pil_images, repeated_issame_list)
ds = Dataset.from_generator(generator)
dataset_name = 'some_name'
print(dataset_name)
ds.save_to_disk(os.path.join(args.save_dir, dataset_name), num_shards=1)
# save some examples
os.makedirs(os.path.join(args.save_dir, dataset_name, 'examples'))
for i in range(5):
ds[i]['image'].save(os.path.join(args.save_dir, dataset_name, 'examples', f'{i}.png'))
# how to load
# from datasets import load_dataset
ds_file = Dataset.load_from_disk(os.path.join(args.save_dir, dataset_name))