首页 > 解决方案 > 对大型视频使用 TimeseriesGenerator 提取帧

问题描述

我想训练一个多对多 LSTM 模型来预测舞蹈动作。我正在使用一个相对较大的视频,我的电脑无法处理提取视频中的所有帧。我用moviepy创建了一个自定义类,通过使用给定的帧号来提取帧。

from moviepy.video.io.VideoFileClip import VideoFileClip
from matplotlib import pyplot as plt
from pathlib import Path
from math import ceil
import numpy as np
import time

class Video:
    def __init__(self,path,**kwargs):
        self.path       = path
        self.video      = VideoFileClip(str(path),**kwargs)
    
    def __repr__(self):
        duration = time.strftime('%H:%M:%S',time.gmtime(self.video.duration))
        return f"<{duration} - {self.path.name}>"

    def __len__(self):
        return ceil(self.video.duration*self.video.fps)

    def __getitem__(self,frame_num):
        frame   = self.video.get_frame(frame_num/self.video.fps)
        return frame
    
    def __iter__(self):
        for frame_num in range(self.__len__()):
            yield self.__getitem__(frame_num)

这个自定义类设法提取具有给定帧号的单个帧。

PATH  = Path("data/HenryStickmin.mp4")
HENRY = Video(PATH, audio=False)
<00:59:54 - HenryStickmin.mp4>

frame_nums = np.random.randint(0, len(HENRY), 4)
plt.figure(figsize=(21,13))
for fig_num, frame_num in zip(range(5), frame_nums):
    plt.subplot(221 + fig_num)
    plt.imshow(HENRY[frame_num])
    plt.axis('off')
    plt.title(f'Frame No: {frame_num}', fontweight='bold')
plt.show()

在此处输入图像描述

我的下一个目标是创建时间序列数据集,但出现此错误

import tensorflow as tf
fps  = 30
gen  = tf.keras.preprocessing.sequence.TimeseriesGenerator(HENRY, HENRY, fps * 2, sampling_rate=2, stride=fps)
X, y = gen[0]
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-37-a7b22e584018> in <module>
----> 1 X, y = gen[0]

~\.conda\envs\ml\lib\site-packages\keras_preprocessing\sequence.py in __getitem__(self, index)
    370                                     self.stride, self.end_index + 1), self.stride)
    371 
--> 372         samples = np.array([self.data[row - self.length:row:self.sampling_rate]
    373                             for row in rows])
    374         targets = np.array([self.targets[row] for row in rows])

~\.conda\envs\ml\lib\site-packages\keras_preprocessing\sequence.py in <listcomp>(.0)
    370                                     self.stride, self.end_index + 1), self.stride)
    371 
--> 372         samples = np.array([self.data[row - self.length:row:self.sampling_rate]
    373                             for row in rows])
    374         targets = np.array([self.targets[row] for row in rows])

<ipython-input-2-40570a429d12> in __getitem__(self, frame_num)
     13 
     14     def __getitem__(self,frame_num):
---> 15         frame   = self.video.get_frame(frame_num/self.video.fps)
     16         return frame
     17 

TypeError: unsupported operand type(s) for /: 'slice' and 'float'

我想用 1 * FPS 帧(1 秒)训练我的模型来预测 1 * FPS 帧(1 秒),并期望得到这样的结果

X[0] = array(['frame[000]', 'frame[002]', 'frame[004]', 'frame[006]',
       'frame[008]', 'frame[010]', 'frame[012]', 'frame[014]',
       'frame[016]', 'frame[018]', 'frame[020]', 'frame[022]',
       'frame[024]', 'frame[026]', 'frame[028]', 'frame[030]',
       'frame[032]', 'frame[034]', 'frame[036]', 'frame[038]',
       'frame[040]', 'frame[042]', 'frame[044]', 'frame[046]',
       'frame[048]', 'frame[050]', 'frame[052]', 'frame[054]',
       'frame[056]', 'frame[058]'])

y[0] = array(['frame[060]', 'frame[062]', 'frame[064]', 'frame[066]',
       'frame[068]', 'frame[070]', 'frame[072]', 'frame[074]',
       'frame[076]', 'frame[078]', 'frame[080]', 'frame[082]',
       'frame[084]', 'frame[086]', 'frame[088]', 'frame[090]',
       'frame[092]', 'frame[094]', 'frame[096]', 'frame[098]',
       'frame[100]', 'frame[102]', 'frame[104]', 'frame[106]',
       'frame[108]', 'frame[110]', 'frame[112]', 'frame[114]',
       'frame[116]', 'frame[118]'])

如何创建一个生成器来从我的视频中提取 (data, target) =(1 seconds, 1 seconds) 帧?

标签: pythontensorflowlstmmoviepy

解决方案


由 keras 运行的列表 compsamples = np.array([self.data[row - self.length:row:self.sampling_rate] 正在将一个slice对象传递给您的__getitem__. 您必须同时处理一个slice对象和您的integer(假设您想以这种方式访问​​您的数据)。

我不确定这是否会按照您的意愿工作,但它应该为您提供一个良好的起点。

from pathlib import Path
from math import ceil
import time


class VideoFileClip():
    def __init__(self, path, **kwargs):
        self.path = Path(path)
        self.duration = 100
        self.fps = 10

    def get_frame(self, num):
        return self


class Video:
    def __init__(self, path, **kwargs):
        self.path = Path(path)
        self.video = VideoFileClip(str(path),**kwargs)

    def __repr__(self):
        duration = time.strftime('%H:%M:%S',time.gmtime(self.video.duration))
        return f"<{duration} - {self.path.name}>"

    def __len__(self):
        return ceil(self.video.duration * self.video.fps)

    def __getitem__(self, key):
        if isinstance(key, slice):
            start, stop, step = key.indices(len(self))
            # not sure if you can be quite this lazy, but you can 
            # make this a list comp if needed
            return (self[i] for i in range(start, stop, step))
        return self.video.get_frame(key / self.video.fps)

    def __iter__(self):
        for frame_num in range(len(self)):
            yield self[frame_num]
vid = Video("path")
vid[0]
vid[0:100]

推荐阅读