Document error reporting. Is there any missing document?
Closed this issue · 5 comments
Data_io.py file reported an error. Is there any missing file?
IndexError:list index out of range
There is a special index -1
for the mean frame when the video doesn't have enough frame. This probably causes the error.
Can you upload the complete project?Because I really want to do this research. Thank you very much.
Everything you need to build the code is available :). You can extract your own feature or use my pre-trained resnet50 features. The code should work fine.
I think the "data_io.py" file was written earlier before the construction of the main dataset files. (The line in_fid_folder = '/home/anguyen/workspace/dataset/Breakfast/v2c_dataset/subtitle/l30_srt' where I suppose those are just main commands in the "train.txt" and "test.txt" from IIT-V2C dataset)
I've worked on an alternative version of "data_io.py" which will successfully uses the ResNet50 features and constructs the pkl files you need for training and testing. ( "train_resnet50_keras_feature_no_sub_mean.pkl" and "test_resnet50_keras_feature_no_sub_mean.pkl")
# read train/test.txt --> create .pkl file for train and test
import os
import random
import pandas as pd
import numpy as np
random.seed(0)
#in_feature_folder = '/home/anguyen/workspace/dataset/Breakfast/v2c_dataset/video/vgg19_caffe_feature'
#in_feature_folder = '/home/anguyen/workspace/dataset/Breakfast/v2c_dataset/video/resnet50_keras_feature'
feature_name = 'resnet50_keras_feature_no_sub_mean'
#feature_name = 'vgg16_keras_feature_no_sub_mean'
#feature_name = 'inception_keras_feature_no_sub_mean'
#feature_name = 'inception_tensorflow_from_saliency'
#in_feature_folder = '/home/anguyen/workspace/dataset/Breakfast/v2c_dataset/video/resnet50_keras_feature_no_sub_mean'
in_feature_folder = os.path.abspath(os.path.join(os.path.curdir, feature_name))
in_train_txt_file = os.path.abspath(os.path.join(os.path.curdir, 'IIT-V2C', 'train.txt'))
in_test_text_file = os.path.abspath(os.path.join(os.path.curdir, 'IIT-V2C', 'test.txt'))
def load_data(in_file):
data = pd.read_pickle(in_file)
return data
def load_annotations(dataset_path=os.path.join('IIT-V2C'),
annotation_file='train.txt'):
"""Helper function to parse IIT-V2C dataset.
"""
# Read annotations
annotations = {}
with open(os.path.join(dataset_path, annotation_file), 'r') as f:
i = 0
annotation = []
for line in f:
line = line.strip()
i += 1
annotation.append(line)
if i % 4 == 0:
# Collect Video Name, Annotated Sub-Video Clip id
video_fname, command = annotation[0] + '.npy', ' '.join(annotation[2].strip().split(' '))
annotations[video_fname] = command
#print(video_fname, command)
annotation = []
return annotations
def save_data(list_data, out_file, annotations):
# faster solution
list_video_id = []
list_caption = []
for l in list_data:
print('-------------------------')
#l = l[0:len(l)-4] # remove ".npy"
caption_cmd = annotations[l]
print('output caption: ', caption_cmd)
list_caption.append(caption_cmd)
# build full path -- to load later
l = in_feature_folder + '/' + l
list_video_id.append(l)
print('input video id: ', l)
df = pd.DataFrame({'video_path':list_video_id, 'caption':list_caption})
print(df)
# save to pickle
df.to_pickle(out_file)
if __name__ == '__main__':
### TRAIN TEST FORMAT FILE
## video_id_1 caption_1
## video_id_2 caption_2
## video_id_3 caption_3
annotations_train = load_annotations(dataset_path=os.path.join('IIT-V2C'),
annotation_file='train.txt')
annotations_test = load_annotations(dataset_path=os.path.join('IIT-V2C'),
annotation_file='test.txt')
list_train = list(annotations_train.keys())
list_test = list(annotations_test.keys())
print('LIST TRAIN[0]: ', list_train[0]) #P45_webcam01_P45_juice_22.npy)
print('LIST TEST[0]: ', list_test[0])
out_folder = os.path.join(os.path.curdir, 'train_test_split')
if not os.path.exists(out_folder):
os.makedirs(out_folder)
train_file = out_folder + '/' + 'train_' + feature_name + '.pkl'
test_file = out_folder + '/' + 'test_' + feature_name + '.pkl'
save_data(list_train, train_file, annotations_train)
save_data(list_test, test_file, annotations_test)
#
print('all done!')
According to the files provided by the author and zonetrooper 32, it's ready to run. Thank you very much. I'll close this issue.