python - 对大型视频使用 TimeseriesGenerator 提取帧
问题描述
我想训练一个多对多 LSTM 模型来预测舞蹈动作。我正在使用一个相对较大的视频,我的电脑无法处理提取视频中的所有帧。我用moviepy创建了一个自定义类,通过使用给定的帧号来提取帧。
from moviepy.video.io.VideoFileClip import VideoFileClip
from matplotlib import pyplot as plt
from pathlib import Path
from math import ceil
import numpy as np
import time
class Video:
def __init__(self,path,**kwargs):
self.path = path
self.video = VideoFileClip(str(path),**kwargs)
def __repr__(self):
duration = time.strftime('%H:%M:%S',time.gmtime(self.video.duration))
return f"<{duration} - {self.path.name}>"
def __len__(self):
return ceil(self.video.duration*self.video.fps)
def __getitem__(self,frame_num):
frame = self.video.get_frame(frame_num/self.video.fps)
return frame
def __iter__(self):
for frame_num in range(self.__len__()):
yield self.__getitem__(frame_num)
这个自定义类设法提取具有给定帧号的单个帧。
PATH = Path("data/HenryStickmin.mp4")
HENRY = Video(PATH, audio=False)
<00:59:54 - HenryStickmin.mp4>
frame_nums = np.random.randint(0, len(HENRY), 4)
plt.figure(figsize=(21,13))
for fig_num, frame_num in zip(range(5), frame_nums):
plt.subplot(221 + fig_num)
plt.imshow(HENRY[frame_num])
plt.axis('off')
plt.title(f'Frame No: {frame_num}', fontweight='bold')
plt.show()
我的下一个目标是创建时间序列数据集,但出现此错误
import tensorflow as tf
fps = 30
gen = tf.keras.preprocessing.sequence.TimeseriesGenerator(HENRY, HENRY, fps * 2, sampling_rate=2, stride=fps)
X, y = gen[0]
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-37-a7b22e584018> in <module>
----> 1 X, y = gen[0]
~\.conda\envs\ml\lib\site-packages\keras_preprocessing\sequence.py in __getitem__(self, index)
370 self.stride, self.end_index + 1), self.stride)
371
--> 372 samples = np.array([self.data[row - self.length:row:self.sampling_rate]
373 for row in rows])
374 targets = np.array([self.targets[row] for row in rows])
~\.conda\envs\ml\lib\site-packages\keras_preprocessing\sequence.py in <listcomp>(.0)
370 self.stride, self.end_index + 1), self.stride)
371
--> 372 samples = np.array([self.data[row - self.length:row:self.sampling_rate]
373 for row in rows])
374 targets = np.array([self.targets[row] for row in rows])
<ipython-input-2-40570a429d12> in __getitem__(self, frame_num)
13
14 def __getitem__(self,frame_num):
---> 15 frame = self.video.get_frame(frame_num/self.video.fps)
16 return frame
17
TypeError: unsupported operand type(s) for /: 'slice' and 'float'
我想用 1 * FPS 帧(1 秒)训练我的模型来预测 1 * FPS 帧(1 秒),并期望得到这样的结果
X[0] = array(['frame[000]', 'frame[002]', 'frame[004]', 'frame[006]',
'frame[008]', 'frame[010]', 'frame[012]', 'frame[014]',
'frame[016]', 'frame[018]', 'frame[020]', 'frame[022]',
'frame[024]', 'frame[026]', 'frame[028]', 'frame[030]',
'frame[032]', 'frame[034]', 'frame[036]', 'frame[038]',
'frame[040]', 'frame[042]', 'frame[044]', 'frame[046]',
'frame[048]', 'frame[050]', 'frame[052]', 'frame[054]',
'frame[056]', 'frame[058]'])
y[0] = array(['frame[060]', 'frame[062]', 'frame[064]', 'frame[066]',
'frame[068]', 'frame[070]', 'frame[072]', 'frame[074]',
'frame[076]', 'frame[078]', 'frame[080]', 'frame[082]',
'frame[084]', 'frame[086]', 'frame[088]', 'frame[090]',
'frame[092]', 'frame[094]', 'frame[096]', 'frame[098]',
'frame[100]', 'frame[102]', 'frame[104]', 'frame[106]',
'frame[108]', 'frame[110]', 'frame[112]', 'frame[114]',
'frame[116]', 'frame[118]'])
如何创建一个生成器来从我的视频中提取 (data, target) =(1 seconds, 1 seconds) 帧?
解决方案
由 keras 运行的列表 compsamples = np.array([self.data[row - self.length:row:self.sampling_rate]
正在将一个slice
对象传递给您的__getitem__
. 您必须同时处理一个slice
对象和您的integer
(假设您想以这种方式访问您的数据)。
我不确定这是否会按照您的意愿工作,但它应该为您提供一个良好的起点。
from pathlib import Path
from math import ceil
import time
class VideoFileClip():
def __init__(self, path, **kwargs):
self.path = Path(path)
self.duration = 100
self.fps = 10
def get_frame(self, num):
return self
class Video:
def __init__(self, path, **kwargs):
self.path = Path(path)
self.video = VideoFileClip(str(path),**kwargs)
def __repr__(self):
duration = time.strftime('%H:%M:%S',time.gmtime(self.video.duration))
return f"<{duration} - {self.path.name}>"
def __len__(self):
return ceil(self.video.duration * self.video.fps)
def __getitem__(self, key):
if isinstance(key, slice):
start, stop, step = key.indices(len(self))
# not sure if you can be quite this lazy, but you can
# make this a list comp if needed
return (self[i] for i in range(start, stop, step))
return self.video.get_frame(key / self.video.fps)
def __iter__(self):
for frame_num in range(len(self)):
yield self[frame_num]
vid = Video("path")
vid[0]
vid[0:100]
推荐阅读
- excel - 减去和添加库存 (SUMIF)
- laravel-5 - 雄辩的`isDirty()`和`getChanges()`之间的不一致
- r - TidyR 跨多个列获取唯一值并转换为列名?
- javascript - eCharts 堆积条形图 - Y 轴
- swift - Swift Combine:如何创建自定义接收器?
- vue.js - 嵌套路由未在路由器视图中呈现
- python - 如何使用 docx 从 python 列表中添加图像?
- python - 在列表中找到最大的重复数字序列
- .net-core - Signal-R 在接收到先前的调用数据后调用另一个服务器方法
- java - 为什么我的程序处理字符编码不正确?