python - 在张量流数据集中单独操作每个批次
问题描述
考虑下面的代码:
import tensorflow as tf
import numpy as np
simple_data_samples = np.array([
[1, 1, 1, 7, -1],
[2, -2, 2, -2, -2],
[3, 3, 3, -3, -3],
[-4, 4, 4, -4, -4],
[5, 5, 5, -5, -5],
[6, 6, 6, -4, -1],
[7, 7, 8, -7, -70],
[8, 8, 8, -8, -8],
[9, 4, 9, -9, -9],
[10, 10, 10, -10, -10],
[11, 5, 11, -11, -11],
[12, 12, 12, -12, -12],
])
def single (ds):
for x in ds:
print(x)
def timeseries_dataset_multistep_combined(features, label_slice, input_sequence_length, output_sequence_length, sequence_stride, batch_size):
feature_ds = tf.keras.preprocessing.timeseries_dataset_from_array(features, None, sequence_length=input_sequence_length + output_sequence_length, sequence_stride=sequence_stride ,batch_size=batch_size, shuffle=False)
return feature_ds
ds = timeseries_dataset_multistep_combined(simple_data_samples, slice(None, None, None), input_sequence_length=4, output_sequence_length=2, sequence_stride=2, batch_size=2)
single(ds)
此代码创建以下批次输出:
tf.Tensor(
[[[ 1 1 1 7 -1]
[ 2 -2 2 -2 -2]
[ 3 3 3 -3 -3]
[ -4 4 4 -4 -4]
[ 5 5 5 -5 -5]
[ 6 6 6 -4 -1]]
[[ 3 3 3 -3 -3]
[ -4 4 4 -4 -4]
[ 5 5 5 -5 -5]
[ 6 6 6 -4 -1]
[ 7 7 8 -7 -70]
[ 8 8 8 -8 -8]]], shape=(2, 6, 5), dtype=int64)
tf.Tensor(
[[[ 5 5 5 -5 -5]
[ 6 6 6 -4 -1]
[ 7 7 8 -7 -70]
[ 8 8 8 -8 -8]
[ 9 4 9 -9 -9]
[ 10 10 10 -10 -10]]
[[ 7 7 8 -7 -70]
[ 8 8 8 -8 -8]
[ 9 4 9 -9 -9]
[ 10 10 10 -10 -10]
[ 11 5 11 -11 -11]
[ 12 12 12 -12 -12]]], shape=(2, 6, 5), dtype=int64)
我想单独操作每个批次。为此,我想分别从每个批次中提取最大值。这可以通过以下代码完成:
def timeseries_dataset_multistep_combined(features, label_slice, input_sequence_length, output_sequence_length, sequence_stride, batch_size):
feature_ds = tf.keras.preprocessing.timeseries_dataset_from_array(features, None, sequence_length=input_sequence_length + output_sequence_length, sequence_stride=sequence_stride ,batch_size=batch_size, shuffle=False)
def extract_max(x):
return tf.reduce_max(x[:,:,-1],axis=1,keepdims=True)
feature_ds = feature_ds.map(extract_max)
return feature_ds
ds = timeseries_dataset_multistep_combined(simple_data_samples, slice(None, None, None), input_sequence_length=4, output_sequence_length=2, sequence_stride=2, batch_size=2)
single(ds)
由于我创建了四个批次,我希望有四个最大值,如下所示:
tf.Tensor(
[[-1]
[-1]], shape=(2, 1), dtype=int64)
tf.Tensor(
[[-1]
[-8]], shape=(2, 1), dtype=int64)
现在我想将每个最大值添加到相应的批次中。例如,对于第一批输出,我将添加第一个最大值 (-1) 并期望以下输出:
[[[ 1 1 1 7 -1]
[ 2 -2 2 -2 -2]
[ 3 3 3 -3 -3]
[ -4 4 4 -4 -4]
[ 5 5 5 -5 -5]
[ 6 6 6 -4 -1]] +(-1) ###first max value =
[[[ 0 0 0 6 -2]
[ 1 -3 1 -3 -3]
[ 2 2 2 -4 -4]
[ -5 3 3 -5 -5]
[ 4 4 4 -6 -6]
[ 5 5 5 -5 -2]]
我将如何编码?
解决方案
你可以使用tf.repeat
并尝试这样的事情:
def broad_cast_and_merge(ds1, ds2):
ds1_shape = tf.shape(ds1)
ds2 = tf.reshape(tf.repeat(ds2, repeats=ds1_shape[1] * ds1_shape[2]), ds1_shape)
return ds1 + ds2
final_ds = tf.data.Dataset.zip((time_series_ds, max_ds)).map(broad_cast_and_merge)
single(final_ds)
或与tf.broadcast_to
:
def broad_cast_and_merge(ds1, ds2):
ds2 = tf.expand_dims(ds2, axis=-1)
ds2 = tf.broadcast_to(ds2, tf.shape(ds1))
return ds1 + ds2
final_ds = tf.data.Dataset.zip((time_series_ds, max_ds)).map(broad_cast_and_merge)
single(final_ds)
两者都会给你相同的结果:
tf.Tensor(
[[[ 0 0 0 6 -2]
[ 1 -3 1 -3 -3]
[ 2 2 2 -4 -4]
[ -5 3 3 -5 -5]
[ 4 4 4 -6 -6]
[ 5 5 5 -5 -2]]
[[ 2 2 2 -4 -4]
[ -5 3 3 -5 -5]
[ 4 4 4 -6 -6]
[ 5 5 5 -5 -2]
[ 6 6 7 -8 -71]
[ 7 7 7 -9 -9]]], shape=(2, 6, 5), dtype=int64)
tf.Tensor(
[[[ 4 4 4 -6 -6]
[ 5 5 5 -5 -2]
[ 6 6 7 -8 -71]
[ 7 7 7 -9 -9]
[ 8 3 8 -10 -10]
[ 9 9 9 -11 -11]]
[[ -1 -1 0 -15 -78]
[ 0 0 0 -16 -16]
[ 1 -4 1 -17 -17]
[ 2 2 2 -18 -18]
[ 3 -3 3 -19 -19]
[ 4 4 4 -20 -20]]], shape=(2, 6, 5), dtype=int64)
如前所述,您也可以跳过广播步骤,直接使用tf.expand_dims
.
推荐阅读
- angular - 角度内插值未在订阅时更新
- google-apps-script - 使用 App 脚本复制 Google 表格,保留评论
- yii2 - yii2 树枝子扩展,父布局输出两次
- image - python如何支持在画布或标签小部件上显示图像?
- ubuntu - Ubuntu 18.04 升级和 Cassandra 连接错误
- maple - 是否可以向 Maple int 添加集成规则?
- java - 为什么 OrElseGet 似乎没有被调用
- html - 带有 phpmailer 的希伯来语 rtl 中的联系表格
- python - 需要 Microsoft Visual C++ 14.0。下载python包时出错
- machine-learning - 学习率对神经网络的影响