首页 > 解决方案 > 此场景 GCNN 模型不存在警告梯度

问题描述

我刚刚尝试使用 TF2.0 框架从本文中实现我自己的 Scene GCNN 版本(之前仅在 Pytorch 中的经验)`:https ://arxiv.org/pdf/2103.06422.pdf 1

我运行的,我得到的警告是:

WARNING:tensorflow:Gradients do not exist for variables [‘scene_gcnn/scene_gcn_conv_2/weight_rs/kernel:0’, ‘scene_gcnn/scene_gcn_conv_2/weight_rs/bias:0’, ‘scene_gcnn/scene_gcn_conv_2/weight_rd/kernel:0’, ‘scene_gcnn/scene_gcn_conv_2/weight_rd/bias:0’] when minimizing the loss.

这是我的代码如下:

import tensorflow as tf
from tensorflow.keras import activations, regularizers, constraints, initializers
import numpy as np 

dot = tf.matmul
spdot = tf.sparse.sparse_dense_matmul




class Scene_GCNConv(tf.keras.layers.Layer):

    def __init__(self,
                 activation=lambda x: x,
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=None,
                 kernel_constraint=None,
                 bias_initializer='ones',
                 bias_regularizer=None,
                 bias_constraint=None,
                 activity_regularizer=None,
                 weight_shape=None,
                 **kwargs):

        self.activation = activations.get(activation)
        self.use_bias = use_bias
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.kernel_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)



        self.weight_shape = weight_shape

 
        self.weight_sd = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_sd")
        self.weight_sr = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_sr")
        self.weight_dr = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_dr")
        self.weight_rs = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_rs")
        self.weight_rd = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_rd")




        super(Scene_GCNConv, self).__init__()

        

    def call(self, z_o_prev, z_r_prev):

        # TODO : switched without z_o and z_r, and included all ones adjacent matrix in initialization
        # adjacent_matrix = 1 - tf.eye(z_o_prev.shape[1]) #which is N + 1

        dim = z_o_prev.shape[1]
        adjacent_matrix = 1 - tf.eye(dim) #which is N + 1

        z_o = self.update_object_nodes(z_o_prev, z_r_prev, adjacent_matrix)
        z_r = self.update_relationship_nodes(z_o_prev, z_r_prev, adjacent_matrix)
        
        output = [z_o, z_r]
                            
        return output
    
    def update_object_nodes(self, object_nodes, relationship_nodes, adjacent_matrix):
        z_o = object_nodes
        z_r = relationship_nodes
        
        dim = adjacent_matrix.shape
        adjacent_matrix_r_compatible = tf.concat([adjacent_matrix, tf.ones([(dim[0]-1)*dim[0], dim[1]])], axis=0)


        first_term = self.weight_sd(z_o)
        second_term = dot(adjacent_matrix_r_compatible,self.weight_sr(z_r), transpose_a = True)
        third_term = dot(adjacent_matrix_r_compatible, self.weight_dr(z_r), transpose_a = True)
        

        z_o = self.activation(first_term + second_term + third_term)


        
        return z_o
    
    def update_relationship_nodes(self, object_nodes, relationship_nodes, adjacent_matrix):
        z_o = object_nodes
        z_r = relationship_nodes
       
        dim = adjacent_matrix.shape
        adjacent_matrix_o_compatible = tf.concat([adjacent_matrix, tf.ones([dim[0], (dim[1]-1)*dim[1]])], axis=1)

        first_term = dot(adjacent_matrix_o_compatible, self.weight_rs(z_o), transpose_a = True)
        second_term = dot(adjacent_matrix_o_compatible, self.weight_rd(z_o),transpose_a = True)
      
        z_r = self.activation(first_term + second_term)
        
        return z_r
        
        


## separate embedding transformation that should be inside a overall Scene Graph Conv Net      


class Scene_GCNN(tf.keras.layers.Layer):

    def __init__(self,
                 activation=lambda x: x,
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 kernel_regularizer=None,
                 kernel_constraint=None,
                 bias_initializer='ones',
                 bias_regularizer=None,
                 bias_constraint=None,
                 activity_regularizer=None,
                 weight_shape_array=None,
                 **kwargs):

        super(Scene_GCNN, self).__init__()

        self.activation = activations.get(activation)
        self.use_bias = use_bias
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.bias_initializer = initializers.get(bias_initializer)
        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.kernel_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)        
        ## Initialize number of layers

        self.weight_shape_array = weight_shape_array    

        self.num_iterations = len(weight_shape_array)


        self.sgcnn_layers = []


        for i, weight_shape in enumerate(self.weight_shape_array):
            self.sgcnn_layers.append(Scene_GCNConv(
                activation=self.activation,
                use_bias=self.use_bias,
                kernel_initializer=self.kernel_initializer,
                kernel_regularizer=self.kernel_regularizer,
                kernel_constraint=self.kernel_constraint,
                bias_initializer=self.bias_initializer,
                bias_regularizer=self.bias_regularizer,
                bias_constraint=self.bias_constraint,
                activity_regularizer=self.activity_regularizer,
                weight_shape=weight_shape))

        d = self.weight_shape_array[0][0]

        embed_relationship = tf.keras.models.Sequential()
        embed_relationship.add(tf.keras.Input(shape=(6,)))
        embed_relationship.add(tf.keras.layers.Dense(d, activation='relu'))
        embed_relationship.add(tf.keras.layers.Dense(d, activation=None))
        
        self.embed_relationship = embed_relationship
        
        embed_background = tf.keras.models.Sequential()
        embed_background.add(tf.keras.Input(shape=(3,)))
        embed_background.add(tf.keras.layers.Dense(d, activation='relu'))
        embed_background.add(tf.keras.layers.Dense(d, activation=None))
        
        self.embed_background = embed_background
        
        
        embed_slots = tf.keras.models.Sequential()
        embed_slots.add(tf.keras.Input(shape=(21,)))
        embed_slots.add(tf.keras.layers.Dense(d, activation='relu'))
        embed_slots.add(tf.keras.layers.Dense(d, activation=None))
        
        self.embed_slots = embed_slots


        final_embed_background = tf.keras.models.Sequential()
        final_embed_background.add(tf.keras.Input(shape=(21,)))
        final_embed_background.add(tf.keras.layers.Dense(3, activation=tf.keras.layers.LeakyReLU(alpha=0.01)))
        
        self.final_embed_background = final_embed_background

    # def call(self, slots, background_latent,):
    def call(self, inputs):

        slots = inputs[0]
        background_latent = inputs[1]
        #slots [B, num_obj, 21]
        #background_latent [B, 1, 3]
        background_latent = background_latent[:,None,:]

        object_nodes = self.get_object_nodes(slots, background_latent)
        relationship_nodes = self.get_relationship_nodes(slots)

        for i in range(self.num_iterations):
            object_nodes, relationship_nodes = self.sgcnn_layers[i](object_nodes, relationship_nodes)

        #object_nodes [B, num_object + 1, 21]
        slots = object_nodes[:,0:-1,:]
        background_latent = self.final_embed_background(object_nodes[:,-1,:])
        # output = [object_nodes, relationship_nodes]
        output = [slots, background_latent]

        return output
    

    def get_object_nodes(self, slots=None, background_latent = None):
        #Embedding of slot
        slots_embedded = self.embed_slots(slots)

        #Embedding of background
        background_latent_embedded = self.embed_background(background_latent)

        object_nodes = tf.concat([slots_embedded, background_latent_embedded], axis = 1) 
        return object_nodes
    
    def get_relationship_nodes(self, slots):
        #Relationship nodes, between background and slots

        #  For nodes connecting two different objects, the geometry feature [20, 49] of 2D object bounding
        # boxes and the box corner coordinates of both connected objects normalized by the image height and width are used as
        # features.
        # In our example, we use x,y,z as values from each slot to get (N+1)^2 x 2d matrix where d=(x,y,z)

        #  The coordinates are flattened and concatenated in
        # the order of source-destination, which differentiate the relationships of different directions.

        # For nodes connecting
        # objects and layouts, since the relationship is presumably
        # different from object-object relationship, we initialize the
        # representations with constant values, leaving the job of inferring reasonable relationship representation to SGCN

        slots_extended = tf.concat([slots[:,:,18:21], tf.ones([slots.shape[0], 1, 3])],axis=1)
        A = tf.repeat(slots_extended, axis = 1,repeats=slots_extended.shape[1])
        #Add [B,1,latent_size] to both A and B to include layout
        B = tf.tile(slots_extended, multiples=[1,slots_extended.shape[1],1])


        relationship_nodes = tf.concat([A,B], axis=2)



        relationship_latent_embedded = self.embed_relationship(relationship_nodes)

        relationship_nodes = relationship_latent_embedded

        return relationship_nodes



if(__name__ == "__main__"):
    weight_shape_array=[(64,128),(128,64),(64,21)]

    scene_gcnn = Scene_GCNN(
            activation='sigmoid',
            use_bias=True,
            kernel_initializer='glorot_uniform',
            kernel_regularizer=None,
            kernel_constraint=None,
            bias_initializer='glorot_normal',
            bias_regularizer=None,
            bias_constraint=None,
            activity_regularizer=None,
            weight_shape_array=weight_shape_array)


    slots = tf.random.uniform([8,3,21])
    background_latent = tf.random.uniform([8,3])
    print(background_latent)
    print(scene_gcnn([slots, background_latent]))

    #Made up learning rate
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-7)

    is_training = True
    with tf.GradientTape() as tape:

        output = scene_gcnn([slots, background_latent])

        #Made up losses, don't think about the metric
        losses_background = tf.reduce_sum(tf.random.uniform([8,3]) - output[1])
        losses_foreground = tf.reduce_sum(tf.random.uniform([8,3,21]) - output[0])
        losses = losses_background + losses_foreground

    if is_training:
        variables = scene_gcnn.trainable_variables
        gradients = tape.gradient(losses, variables)
        optimizer.apply_gradients(zip(gradients, variables))


如何避免和解决此警告?我知道这是一个很长的代码示例,但我包含了虚拟测试示例,如果您在其上调用 python 命令(需要 TF2.0),您可以运行该示例。我可以假设我的参数没有被学习,但我看不出这是因为什么原因发生的?我尝试调试这个东西,看看我的漏洞在哪里,理解它一直在努力,但到目前为止还没有结果。

标签: debuggingconv-neural-networktensorflow2.0

解决方案


推荐阅读