Thank you for your contribution.
I found some confusing code here:
|
def load_video(self, video_path, num_segments=8, return_msg=False): |
|
vr = VideoReader(video_path, ctx=cpu(0)) |
|
num_frames = len(vr) |
|
frame_indices = self.get_index(num_frames, num_segments) |
|
|
|
duration = len(vr) // vr.get_avg_fps() |
|
index = np.linspace(0, len(vr)-1, num=int(duration)) |
|
buffer = vr.get_batch(index).asnumpy() |
|
# transform |
|
|
|
images_group = list() |
|
for frame in buffer: |
|
img = Image.fromarray(frame) |
|
images_group.append(img) |
|
images_group = list() |
|
for frame_index in frame_indices: |
|
img = Image.fromarray(vr[frame_index].asnumpy()) |
|
images_group.append(img) |
|
if return_msg: |
|
fps = float(vr.get_avg_fps()) |
|
sec = ", ".join([str(round(f / fps, 1)) for f in frame_indices]) |
|
# " " should be added in the start and end |
|
msg = f"The video contains {len(frame_indices)} frames sampled at {sec} seconds." |
|
return images_group, msg |
|
else: |
|
return images_group |
Why do we need buffer
?
The images_group
is initialized again?
Secondly, what is the purpose of num_segments
in get_index
function? num_segments
is being set to num_frames
in upload_video
|
num_segments = self.model.config.num_frames if num_segments is None else num_segments |
Moreover, It seems like the
offsets
returned by the
get_index
are directly used as
frame_indices
.
|
frame_indices = self.get_index(num_frames, num_segments) |
However, in the other evaluation scenarios, it is not the case e.g.,
|
def get_index(self, bound, fps, max_frame, first_idx=0): |
|
if bound: |
|
start, end = bound[0], bound[1] |
|
else: |
|
start, end = -100000, 100000 |
|
start_idx = max(first_idx, round(start * fps)) |
|
end_idx = min(round(end * fps), max_frame) |
|
seg_size = float(end_idx - start_idx) / self.num_segments |
|
frame_indices = np.array([ |
|
int(start_idx + (seg_size / 2) + np.round(seg_size * idx)) |
|
for idx in range(self.num_segments) |
|
]) |
|
return frame_indices |
|
|
|
def read_video(self, video_path, bound=None): |
|
vr = VideoReader(video_path, ctx=cpu(0), num_threads=4) |
|
max_frame = len(vr) - 1 |
|
fps = float(vr.get_avg_fps()) |
|
|
|
images_group = list() |
|
frame_indices = self.get_index(bound, fps, max_frame, first_idx=0) |
|
for frame_index in frame_indices: |
|
img = Image.fromarray(vr[frame_index].asnumpy()) |
|
images_group.append(img) |
|
return images_group |