python - 警告:张量流:在最小化损失和转换 Keras 模型的问题时,变量不存在梯度
问题描述
我创建了一个实现自定义因果 Conv1D 的 Keras 自定义层。为此,我简单地使用了 Conv 和 Conv1D 类(它们位于 tensorflow/python/keras/layers/convolutional.py 中)并修改了 Conv。我将向您展示如何将这一层与一个非常愚蠢的示例数据集一起使用,该示例数据集的特点是只有两个训练样本(给定一个有序的数字序列作为训练样本,模型应该预测它的下一个序列:例如,如果我've [0, 1, 2] 输出将是 [3, 4, 5])。但是我收到以下警告和错误:
Epoch 1/10
WARNING:tensorflow:Gradients do not exist for variables ['MyConv/kernel:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['MyConv/kernel:0'] when minimizing the loss.
2/2 [==============================] - 1s 1ms/step - loss: 113.0949 - accuracy: 1.0000
Epoch 2/10
2/2 [==============================] - 0s 1ms/step - loss: 117.4004 - accuracy: 1.0000
Epoch 3/10
2/2 [==============================] - 0s 1ms/step - loss: 124.0515 - accuracy: 1.0000
Epoch 4/10
2/2 [==============================] - 0s 994us/step - loss: 132.9754 - accuracy: 1.0000
Epoch 5/10
2/2 [==============================] - 0s 1ms/step - loss: 154.4105 - accuracy: 1.0000
Epoch 6/10
2/2 [==============================] - 0s 4ms/step - loss: 157.2047 - accuracy: 1.0000
Epoch 7/10
2/2 [==============================] - 0s 981us/step - loss: 184.0751 - accuracy: 1.0000
Epoch 8/10
2/2 [==============================] - 0s 902us/step - loss: 201.2888 - accuracy: 1.0000
Epoch 9/10
2/2 [==============================] - 0s 888us/step - loss: 219.6019 - accuracy: 1.0000
Epoch 10/10
2/2 [==============================] - 0s 5ms/step - loss: 238.6077 - accuracy: 1.0000
ValueError: Input 0 of node model/MyConv/model/MyConv/strided_slice/_assign was passed float from model/MyConv/ReadVariableOp/resource:0 incompatible with expected resource.
示例代码是这样的:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,Activation,Flatten,Dense
from tensorflow.python.eager import context
from tensorflow.python.framework import tensor_shape
from tensorflow.python.keras import activations
from tensorflow.python.keras import backend
from tensorflow.python.keras import constraints
from tensorflow.python.keras import initializers
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.engine.base_layer import Layer
from tensorflow.python.keras.engine.input_spec import InputSpec
# imports for backwards namespace compatibility
# pylint: disable=unused-import
# pylint: enable=unused-import
from tensorflow.python.keras.utils import conv_utils
from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
from tensorflow.python.util.tf_export import keras_export
# pylint: disable=g-classes-have-attributes
z = None
copy = None
outputs = None
class Myconv(Layer):
def __init__(self,
rank,
filters,
kernel_size,
strides=1,
padding='valid',
data_format=None,
dilation_rate=1,
groups=1,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
trainable=True,
name=None,
conv_op=None,
**kwargs):
super(Myconv, self).__init__(
trainable=trainable,
name=name,
activity_regularizer=regularizers.get(activity_regularizer),
**kwargs)
self.rank = rank
if isinstance(filters, float):
filters = int(filters)
self.filters = filters
self.groups = groups or 1
self.kernel_size = conv_utils.normalize_tuple(
kernel_size, rank, 'kernel_size')
self.strides = conv_utils.normalize_tuple(strides, rank, 'strides')
self.padding = conv_utils.normalize_padding(padding)
self.data_format = conv_utils.normalize_data_format(data_format)
self.dilation_rate = conv_utils.normalize_tuple(
dilation_rate, rank, 'dilation_rate')
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.input_spec = InputSpec(min_ndim=self.rank + 2)
self._validate_init()
self._is_causal = self.padding == 'causal'
self._channels_first = self.data_format == 'channels_first'
self._tf_data_format = conv_utils.convert_data_format(
self.data_format, self.rank + 2)
def _validate_init(self):
if self.filters is not None and self.filters % self.groups != 0:
raise ValueError(
'The number of filters must be evenly divisible by the number of '
'groups. Received: groups={}, filters={}'.format(
self.groups, self.filters))
if not all(self.kernel_size):
raise ValueError('The argument `kernel_size` cannot contain 0(s). '
'Received: %s' % (self.kernel_size,))
if (self.padding == 'causal' and not isinstance(self,
(Conv1D, SeparableConv1D))):
raise ValueError('Causal padding is only supported for `Conv1D`'
'and `SeparableConv1D`.')
def build(self, input_shape):
input_shape = tensor_shape.TensorShape(input_shape)
input_channel = self._get_input_channel(input_shape)
if input_channel % self.groups != 0:
raise ValueError(
'The number of input channels must be evenly divisible by the number '
'of groups. Received groups={}, but the input has {} channels '
'(full input shape is {}).'.format(self.groups, input_channel,
input_shape))
kernel_shape = self.kernel_size + (input_channel // self.groups,
self.filters)
self.kernel = self.add_weight(
name='kernel',
shape=kernel_shape,
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint,
trainable=True,
dtype=self.dtype)
if self.use_bias:
self.bias = self.add_weight(
name='bias',
shape=(self.filters,),
initializer=self.bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint,
trainable=True,
dtype=self.dtype)
else:
self.bias = None
channel_axis = self._get_channel_axis()
self.input_spec = InputSpec(min_ndim=self.rank + 2,
axes={channel_axis: input_channel})
# Convert Keras formats to TF native formats.
if self.padding == 'causal':
tf_padding = 'VALID' # Causal padding handled in `call`.
elif isinstance(self.padding, six.string_types):
tf_padding = self.padding.upper()
else:
tf_padding = self.padding
tf_dilations = list(self.dilation_rate)
tf_strides = list(self.strides)
tf_op_name = self.__class__.__name__
if tf_op_name == 'Myconv1D':
tf_op_name = 'conv1d' # Backwards compat.
self.built = True
def call(self, inputs):
input_shape = inputs.shape
if self._is_causal:
global z
if z is None:
z = tf.Variable(lambda : tf.zeros([1,11,1],dtype="float32"))
global copy
if copy is None:
copy = tf.Variable(lambda : tf.zeros([1,5,1],dtype="float32"))
global outputs
if outputs is None:
outputs = tf.Variable(lambda : tf.zeros([1,7,1],dtype="float32"))
for i in range(0,7,1):
z[0,4+i,0].assign(inputs[0,i,0])
dim_k = 3 # kernel dimension
dilation = 2
dim = 5
size = 7
# 1D convolution
for i in range(0,size,1):
for m in range(0,dim,1):
copy[0,m,0].assign(z[0,m+i,0])
for j in range(0,dim_k,1):
outputs[0,i,0].assign((outputs[0,i,0])+(copy[0,j*dilation,0])*(self.kernel[j,0,0]))
if self.use_bias:
output_rank = outputs.shape.rank
if self.rank == 1 and self._channels_first:
# nn.bias_add does not accept a 1D input tensor.
bias = array_ops.reshape(self.bias, (1, self.filters, 1))
outputs += bias
else:
# Handle multiple batch dimensions.
if output_rank is not None and output_rank > 2 + self.rank:
def _apply_fn(o):
return nn.bias_add(o, self.bias, data_format=self._tf_data_format)
outputs = nn_ops.squeeze_batch_dims(
outputs, _apply_fn, inner_rank=self.rank + 1)
else:
outputs = nn.bias_add(
outputs, self.bias, data_format=self._tf_data_format)
if not context.executing_eagerly():
# Infer the static output shape:
out_shape = self.compute_output_shape(input_shape)
outputs.set_shape(out_shape)
if self.activation is not None:
return self.activation(outputs)
return outputs
def _spatial_output_shape(self, spatial_input_shape):
return [
conv_utils.conv_output_length(
length,
self.kernel_size[i],
padding=self.padding,
stride=self.strides[i],
dilation=self.dilation_rate[i])
for i, length in enumerate(spatial_input_shape)
]
def compute_output_shape(self, input_shape):
input_shape = tensor_shape.TensorShape(input_shape).as_list()
batch_rank = len(input_shape) - self.rank - 1
if self.data_format == 'channels_last':
return tensor_shape.TensorShape(
input_shape[:batch_rank]
+ self._spatial_output_shape(input_shape[batch_rank:-1])
+ [self.filters])
else:
return tensor_shape.TensorShape(
input_shape[:batch_rank] + [self.filters] +
self._spatial_output_shape(input_shape[batch_rank + 1:]))
def _recreate_conv_op(self, inputs): # pylint: disable=unused-argument
return False
def get_config(self):
config = {
'filters':
self.filters,
'kernel_size':
self.kernel_size,
'strides':
self.strides,
'padding':
self.padding,
'data_format':
self.data_format,
'dilation_rate':
self.dilation_rate,
'groups':
self.groups,
'activation':
activations.serialize(self.activation),
'use_bias':
self.use_bias,
'kernel_initializer':
initializers.serialize(self.kernel_initializer),
'bias_initializer':
initializers.serialize(self.bias_initializer),
'kernel_regularizer':
regularizers.serialize(self.kernel_regularizer),
'bias_regularizer':
regularizers.serialize(self.bias_regularizer),
'activity_regularizer':
regularizers.serialize(self.activity_regularizer),
'kernel_constraint':
constraints.serialize(self.kernel_constraint),
'bias_constraint':
constraints.serialize(self.bias_constraint)
}
base_config = super(Myconv, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def _compute_causal_padding(self, inputs):
"""Calculates padding for 'causal' option for 1-d conv layers."""
left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1)
if getattr(inputs.shape, 'ndims', None) is None:
batch_rank = 1
else:
batch_rank = len(inputs.shape) - 2
if self.data_format == 'channels_last':
causal_padding = [[0, 0]] * batch_rank + [[left_pad, 0], [0, 0]]
else:
causal_padding = [[0, 0]] * batch_rank + [[0, 0], [left_pad, 0]]
return causal_padding
def _get_channel_axis(self):
if self.data_format == 'channels_first':
return -1 - self.rank
else:
return -1
def _get_input_channel(self, input_shape):
channel_axis = self._get_channel_axis()
if input_shape.dims[channel_axis].value is None:
raise ValueError('The channel dimension of the inputs '
'should be defined. Found `None`.')
return int(input_shape[channel_axis])
def _get_padding_op(self):
if self.padding == 'causal':
op_padding = 'valid'
else:
op_padding = self.padding
if not isinstance(op_padding, (list, tuple)):
op_padding = op_padding.upper()
return op_padding
@keras_export('keras.layers.Myconv1D', 'keras.layers.Myconvol1D')
class Myconv1D(Myconv):
def __init__(self,
filters,
kernel_size,
strides=1,
padding='valid',
data_format='channels_last',
dilation_rate=1,
groups=1,
activation=None,
use_bias=True,
kernel_initializer='glorot_uniform',
bias_initializer='zeros',
kernel_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
bias_constraint=None,
**kwargs):
super(Myconvol, self).__init__(
rank=1,
filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
data_format=data_format,
dilation_rate=dilation_rate,
groups=groups,
activation=activations.get(activation),
use_bias=use_bias,
kernel_initializer=initializers.get(kernel_initializer),
bias_initializer=initializers.get(bias_initializer),
kernel_regularizer=regularizers.get(kernel_regularizer),
bias_regularizer=regularizers.get(bias_regularizer),
activity_regularizer=regularizers.get(activity_regularizer),
kernel_constraint=constraints.get(kernel_constraint),
bias_constraint=constraints.get(bias_constraint),
**kwargs)
# train the model
def build_model(x_train, y_train):
train1 = x_train.shape[1]
train2 = x_train.shape[2]
verbose, epochs, batch_size = 1, 10, 1
# define model
inp=Input(shape=(train1, train2), batch_size=1)
r=Myconv1D(filters=1,kernel_size=3,padding='causal',dilation_rate=1,use_bias=False,name="MyConv")(inp)
r = Flatten()(r)
r = Dense(10, activation='relu')(r)
r = Dense(7)(r)
model = Model(inputs=inp, outputs=r)
model.summary()
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
# fit network
model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
return model
x_train = tf.constant([
[[0],
[1],
[2],
[3],
[4],
[5],
[6]],
[[1],
[2],
[3],
[4],
[5],
[6],
[7]],])
y_train = tf.constant(
[[7, 8, 9, 10, 11, 12, 13],
[8, 9, 10, 11, 12, 13, 14]])
model = build_model(x_train, y_train)
tflite_model_name = 'tcn'
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = converter.convert()
open(tflite_model_name + '.tflite', 'wb').write(tflite_model)
我这样做是因为也许以后我可以通过这种方式将模型转换为没有 Conv2D 和 expand_dims 的 TFLite(如果我尝试使用标准 Conv1D 转换模型,它会被上面的操作替换)。
解决方案
推荐阅读
- excel - 代码在 office 2010 和 2016 中有效,但在 office 2007 中无效
- jhipster - jhipster,api-first 方法,生成的 api 现在显示在 swagger-ui 中
- c# - 如何在 C# Visual Studio 中获取没有 html 标签的 email.Body?
- xamarin.forms - 图标显示在 FlyoutItem 中,但不在底部的 Tabbar 中
- arrays - 在 C 中,如何“直接”将结构复制到数组成员?
- arrays - Ansible 子元素(2.3 版)
- python - 如何使用 VCS 中的 pip 将包强制安装到站点包
- python - 重组 Pyspark 数据框:使用行元素创建新列
- pytorch - 如何更改transformers.bert的最大序列长度?
- php - 当我从 ajax 发送数据时,如何在 jQuery 中使用 DataTables 对列进行排序?