首页 > 解决方案 > 警告:张量流:在最小化损失和转换 Keras 模型的问题时,变量不存在梯度

问题描述

我创建了一个实现自定义因果 Conv1D 的 Keras 自定义层。为此,我简单地使用了 Conv 和 Conv1D 类(它们位于 tensorflow/python/keras/layers/convolutional.py 中)并修改了 Conv。我将向您展示如何将这一层与一个非常愚蠢的示例数据集一起使用,该示例数据集的特点是只有两个训练样本(给定一个有序的数字序列作为训练样本,模型应该预测它的下一个序列:例如,如果我've [0, 1, 2] 输出将是 [3, 4, 5])。但是我收到以下警告和错误:

Epoch 1/10
WARNING:tensorflow:Gradients do not exist for variables ['MyConv/kernel:0'] when minimizing the loss.
WARNING:tensorflow:Gradients do not exist for variables ['MyConv/kernel:0'] when minimizing the loss.
2/2 [==============================] - 1s 1ms/step - loss: 113.0949 - accuracy: 1.0000
Epoch 2/10
2/2 [==============================] - 0s 1ms/step - loss: 117.4004 - accuracy: 1.0000
Epoch 3/10
2/2 [==============================] - 0s 1ms/step - loss: 124.0515 - accuracy: 1.0000
Epoch 4/10
2/2 [==============================] - 0s 994us/step - loss: 132.9754 - accuracy: 1.0000
Epoch 5/10
2/2 [==============================] - 0s 1ms/step - loss: 154.4105 - accuracy: 1.0000
Epoch 6/10
2/2 [==============================] - 0s 4ms/step - loss: 157.2047 - accuracy: 1.0000
Epoch 7/10
2/2 [==============================] - 0s 981us/step - loss: 184.0751 - accuracy: 1.0000
Epoch 8/10
2/2 [==============================] - 0s 902us/step - loss: 201.2888 - accuracy: 1.0000
Epoch 9/10
2/2 [==============================] - 0s 888us/step - loss: 219.6019 - accuracy: 1.0000
Epoch 10/10
2/2 [==============================] - 0s 5ms/step - loss: 238.6077 - accuracy: 1.0000

ValueError: Input 0 of node model/MyConv/model/MyConv/strided_slice/_assign was passed float from model/MyConv/ReadVariableOp/resource:0 incompatible with expected resource.

示例代码是这样的:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import six
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,Activation,Flatten,Dense
from tensorflow.python.eager import context
from tensorflow.python.framework import tensor_shape
from tensorflow.python.keras import activations
from tensorflow.python.keras import backend
from tensorflow.python.keras import constraints
from tensorflow.python.keras import initializers
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.engine.base_layer import Layer
from tensorflow.python.keras.engine.input_spec import InputSpec
# imports for backwards namespace compatibility
# pylint: disable=unused-import
# pylint: enable=unused-import
from tensorflow.python.keras.utils import conv_utils
from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
from tensorflow.python.util.tf_export import keras_export
# pylint: disable=g-classes-have-attributes

z = None
copy = None
outputs = None
  
  class Myconv(Layer):
 

  def __init__(self,
               rank,
               filters,
               kernel_size,
               strides=1,
               padding='valid',
               data_format=None,
               dilation_rate=1,
               groups=1,
               activation=None,
               use_bias=True,
               kernel_initializer='glorot_uniform',
               bias_initializer='zeros',
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               trainable=True,
               name=None,
               conv_op=None,
               **kwargs):
    super(Myconv, self).__init__(
        trainable=trainable,
        name=name,
        activity_regularizer=regularizers.get(activity_regularizer),
        **kwargs)
    self.rank = rank

    if isinstance(filters, float):
      filters = int(filters)
    self.filters = filters
    self.groups = groups or 1
    self.kernel_size = conv_utils.normalize_tuple(
        kernel_size, rank, 'kernel_size')
    self.strides = conv_utils.normalize_tuple(strides, rank, 'strides')
    self.padding = conv_utils.normalize_padding(padding)
    self.data_format = conv_utils.normalize_data_format(data_format)
    self.dilation_rate = conv_utils.normalize_tuple(
        dilation_rate, rank, 'dilation_rate')

    self.activation = activations.get(activation)
    self.use_bias = use_bias

    self.kernel_initializer = initializers.get(kernel_initializer)
    self.bias_initializer = initializers.get(bias_initializer)
    self.kernel_regularizer = regularizers.get(kernel_regularizer)
    self.bias_regularizer = regularizers.get(bias_regularizer)
    self.kernel_constraint = constraints.get(kernel_constraint)
    self.bias_constraint = constraints.get(bias_constraint)
    self.input_spec = InputSpec(min_ndim=self.rank + 2)

    self._validate_init()
    self._is_causal = self.padding == 'causal'
    self._channels_first = self.data_format == 'channels_first'
    self._tf_data_format = conv_utils.convert_data_format(
        self.data_format, self.rank + 2)

  def _validate_init(self):
    if self.filters is not None and self.filters % self.groups != 0:
      raise ValueError(
          'The number of filters must be evenly divisible by the number of '
          'groups. Received: groups={}, filters={}'.format(
              self.groups, self.filters))

    if not all(self.kernel_size):
      raise ValueError('The argument `kernel_size` cannot contain 0(s). '
                       'Received: %s' % (self.kernel_size,))

    if (self.padding == 'causal' and not isinstance(self,
                                                    (Conv1D, SeparableConv1D))):
      raise ValueError('Causal padding is only supported for `Conv1D`'
                       'and `SeparableConv1D`.')

  def build(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape)
    input_channel = self._get_input_channel(input_shape)
    if input_channel % self.groups != 0:
      raise ValueError(
          'The number of input channels must be evenly divisible by the number '
          'of groups. Received groups={}, but the input has {} channels '
          '(full input shape is {}).'.format(self.groups, input_channel,
                                             input_shape))
    kernel_shape = self.kernel_size + (input_channel // self.groups,
                                       self.filters)

    self.kernel = self.add_weight(
        name='kernel',
        shape=kernel_shape,
        initializer=self.kernel_initializer,
        regularizer=self.kernel_regularizer,
        constraint=self.kernel_constraint,
        trainable=True,
        dtype=self.dtype)
    if self.use_bias:
      self.bias = self.add_weight(
          name='bias',
          shape=(self.filters,),
          initializer=self.bias_initializer,
          regularizer=self.bias_regularizer,
          constraint=self.bias_constraint,
          trainable=True,
          dtype=self.dtype)
    else:
      self.bias = None
    channel_axis = self._get_channel_axis()
    self.input_spec = InputSpec(min_ndim=self.rank + 2,
                                axes={channel_axis: input_channel})

    # Convert Keras formats to TF native formats.
    if self.padding == 'causal':
      tf_padding = 'VALID'  # Causal padding handled in `call`.
    elif isinstance(self.padding, six.string_types):
      tf_padding = self.padding.upper()
    else:
      tf_padding = self.padding
    tf_dilations = list(self.dilation_rate)
    tf_strides = list(self.strides)

    tf_op_name = self.__class__.__name__
     if tf_op_name == 'Myconv1D':
      tf_op_name = 'conv1d'  # Backwards compat.

    self.built = True

  def call(self, inputs):
    input_shape = inputs.shape

    if self._is_causal:  
      
      global z
      if z is None:
        z = tf.Variable(lambda : tf.zeros([1,11,1],dtype="float32"))
   
      global copy
      if copy is None:
        copy = tf.Variable(lambda : tf.zeros([1,5,1],dtype="float32"))
       
      global outputs
      if outputs is None:
        outputs = tf.Variable(lambda : tf.zeros([1,7,1],dtype="float32"))
    
   
      for i in range(0,7,1):
        z[0,4+i,0].assign(inputs[0,i,0])
   
      dim_k = 3      # kernel dimension
      dilation = 2
      dim = 5  
      size = 7
   
      # 1D convolution
      for i in range(0,size,1):
        for m in range(0,dim,1):
           copy[0,m,0].assign(z[0,m+i,0])
       
        for j in range(0,dim_k,1):
           outputs[0,i,0].assign((outputs[0,i,0])+(copy[0,j*dilation,0])*(self.kernel[j,0,0]))
  
      
    if self.use_bias:
      output_rank = outputs.shape.rank
      if self.rank == 1 and self._channels_first:
        # nn.bias_add does not accept a 1D input tensor.
        bias = array_ops.reshape(self.bias, (1, self.filters, 1))
        outputs += bias
      else:
        # Handle multiple batch dimensions.
        if output_rank is not None and output_rank > 2 + self.rank:

          def _apply_fn(o):
            return nn.bias_add(o, self.bias, data_format=self._tf_data_format)

          outputs = nn_ops.squeeze_batch_dims(
              outputs, _apply_fn, inner_rank=self.rank + 1)
        else:
          outputs = nn.bias_add(
              outputs, self.bias, data_format=self._tf_data_format)


    if not context.executing_eagerly():
       # Infer the static output shape:
       out_shape = self.compute_output_shape(input_shape)
       outputs.set_shape(out_shape)

    if self.activation is not None:
      return self.activation(outputs)
    
    return outputs

  def _spatial_output_shape(self, spatial_input_shape):
    return [
        conv_utils.conv_output_length(
            length,
            self.kernel_size[i],
            padding=self.padding,
            stride=self.strides[i],
            dilation=self.dilation_rate[i])
        for i, length in enumerate(spatial_input_shape)
    ]

  def compute_output_shape(self, input_shape):
    input_shape = tensor_shape.TensorShape(input_shape).as_list()
    batch_rank = len(input_shape) - self.rank - 1
    if self.data_format == 'channels_last':
      return tensor_shape.TensorShape(
          input_shape[:batch_rank]
          + self._spatial_output_shape(input_shape[batch_rank:-1])
          + [self.filters])
    else:
      return tensor_shape.TensorShape(
          input_shape[:batch_rank] + [self.filters] +
          self._spatial_output_shape(input_shape[batch_rank + 1:]))

  def _recreate_conv_op(self, inputs):  # pylint: disable=unused-argument
    return False

  def get_config(self):
    config = {
        'filters':
            self.filters,
        'kernel_size':
            self.kernel_size,
        'strides':
            self.strides,
        'padding':
            self.padding,
        'data_format':
            self.data_format,
        'dilation_rate':
            self.dilation_rate,
        'groups':
            self.groups,
        'activation':
            activations.serialize(self.activation),
        'use_bias':
            self.use_bias,
        'kernel_initializer':
            initializers.serialize(self.kernel_initializer),
        'bias_initializer':
            initializers.serialize(self.bias_initializer),
        'kernel_regularizer':
            regularizers.serialize(self.kernel_regularizer),
        'bias_regularizer':
            regularizers.serialize(self.bias_regularizer),
        'activity_regularizer':
            regularizers.serialize(self.activity_regularizer),
        'kernel_constraint':
            constraints.serialize(self.kernel_constraint),
        'bias_constraint':
            constraints.serialize(self.bias_constraint)
    }
    base_config = super(Myconv, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

  def _compute_causal_padding(self, inputs):
    """Calculates padding for 'causal' option for 1-d conv layers."""
    left_pad = self.dilation_rate[0] * (self.kernel_size[0] - 1)
    if getattr(inputs.shape, 'ndims', None) is None:
      batch_rank = 1
    else:
      batch_rank = len(inputs.shape) - 2
    if self.data_format == 'channels_last':
      causal_padding = [[0, 0]] * batch_rank + [[left_pad, 0], [0, 0]]
    else:
      causal_padding = [[0, 0]] * batch_rank + [[0, 0], [left_pad, 0]]
    return causal_padding

  def _get_channel_axis(self):
    if self.data_format == 'channels_first':
      return -1 - self.rank
    else:
      return -1

  def _get_input_channel(self, input_shape):
    channel_axis = self._get_channel_axis()
    if input_shape.dims[channel_axis].value is None:
      raise ValueError('The channel dimension of the inputs '
                       'should be defined. Found `None`.')
    return int(input_shape[channel_axis])

  def _get_padding_op(self):
    if self.padding == 'causal':
      op_padding = 'valid'
    else:
      op_padding = self.padding
    if not isinstance(op_padding, (list, tuple)):
      op_padding = op_padding.upper()
    return op_padding


@keras_export('keras.layers.Myconv1D', 'keras.layers.Myconvol1D')
class Myconv1D(Myconv):
  

  def __init__(self,
               filters,
               kernel_size,
               strides=1,
               padding='valid',
               data_format='channels_last',
               dilation_rate=1,
               groups=1,
               activation=None,
               use_bias=True,
               kernel_initializer='glorot_uniform',
               bias_initializer='zeros',
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               **kwargs):
    super(Myconvol, self).__init__(
        rank=1,
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        dilation_rate=dilation_rate,
        groups=groups,
        activation=activations.get(activation),
        use_bias=use_bias,
        kernel_initializer=initializers.get(kernel_initializer),
        bias_initializer=initializers.get(bias_initializer),
        kernel_regularizer=regularizers.get(kernel_regularizer),
        bias_regularizer=regularizers.get(bias_regularizer),
        activity_regularizer=regularizers.get(activity_regularizer),
        kernel_constraint=constraints.get(kernel_constraint),
        bias_constraint=constraints.get(bias_constraint),
        **kwargs)

# train the model
def build_model(x_train, y_train):
        train1 = x_train.shape[1]
        train2 = x_train.shape[2]
        verbose, epochs, batch_size = 1, 10, 1
        # define model
        inp=Input(shape=(train1, train2), batch_size=1)
        r=Myconv1D(filters=1,kernel_size=3,padding='causal',dilation_rate=1,use_bias=False,name="MyConv")(inp)
        r = Flatten()(r)
        r = Dense(10, activation='relu')(r)
        r = Dense(7)(r)
        model = Model(inputs=inp, outputs=r)
        model.summary()
        model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
        # fit network
        model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
        return model

x_train = tf.constant([
    [[0],
     [1],
     [2],
     [3],
     [4],
     [5],
     [6]],
    [[1],
     [2],
     [3],
     [4],
     [5],
     [6],
     [7]],])
   


y_train = tf.constant(
    [[7, 8, 9, 10, 11, 12, 13],
    [8, 9, 10, 11, 12, 13, 14]])


model = build_model(x_train, y_train)

tflite_model_name = 'tcn'
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = converter.convert()
open(tflite_model_name + '.tflite', 'wb').write(tflite_model)
 

我这样做是因为也许以后我可以通过这种方式将模型转换为没有 Conv2D 和 expand_dims 的 TFLite(如果我尝试使用标准 Conv1D 转换模型,它会被上面的操作替换)。

标签: pythontensorflowkerastensorflow-lite

解决方案


推荐阅读