debugging - 此场景 GCNN 模型不存在警告梯度
问题描述
我刚刚尝试使用 TF2.0 框架从本文中实现我自己的 Scene GCNN 版本(之前仅在 Pytorch 中的经验)`:https ://arxiv.org/pdf/2103.06422.pdf 1
我运行的,我得到的警告是:
WARNING:tensorflow:Gradients do not exist for variables [‘scene_gcnn/scene_gcn_conv_2/weight_rs/kernel:0’, ‘scene_gcnn/scene_gcn_conv_2/weight_rs/bias:0’, ‘scene_gcnn/scene_gcn_conv_2/weight_rd/kernel:0’, ‘scene_gcnn/scene_gcn_conv_2/weight_rd/bias:0’] when minimizing the loss.
这是我的代码如下:
import tensorflow as tf
from tensorflow.keras import activations, regularizers, constraints, initializers
import numpy as np
dot = tf.matmul
spdot = tf.sparse.sparse_dense_matmul
class Scene_GCNConv(tf.keras.layers.Layer):
def __init__(self,
activation=lambda x: x,
use_bias=True,
kernel_initializer='glorot_uniform',
kernel_regularizer=None,
kernel_constraint=None,
bias_initializer='ones',
bias_regularizer=None,
bias_constraint=None,
activity_regularizer=None,
weight_shape=None,
**kwargs):
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.weight_shape = weight_shape
self.weight_sd = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_sd")
self.weight_sr = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_sr")
self.weight_dr = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_dr")
self.weight_rs = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_rs")
self.weight_rd = tf.keras.layers.Dense(self.weight_shape[1], activation=None, use_bias=self.use_bias, name="weight_rd")
super(Scene_GCNConv, self).__init__()
def call(self, z_o_prev, z_r_prev):
# TODO : switched without z_o and z_r, and included all ones adjacent matrix in initialization
# adjacent_matrix = 1 - tf.eye(z_o_prev.shape[1]) #which is N + 1
dim = z_o_prev.shape[1]
adjacent_matrix = 1 - tf.eye(dim) #which is N + 1
z_o = self.update_object_nodes(z_o_prev, z_r_prev, adjacent_matrix)
z_r = self.update_relationship_nodes(z_o_prev, z_r_prev, adjacent_matrix)
output = [z_o, z_r]
return output
def update_object_nodes(self, object_nodes, relationship_nodes, adjacent_matrix):
z_o = object_nodes
z_r = relationship_nodes
dim = adjacent_matrix.shape
adjacent_matrix_r_compatible = tf.concat([adjacent_matrix, tf.ones([(dim[0]-1)*dim[0], dim[1]])], axis=0)
first_term = self.weight_sd(z_o)
second_term = dot(adjacent_matrix_r_compatible,self.weight_sr(z_r), transpose_a = True)
third_term = dot(adjacent_matrix_r_compatible, self.weight_dr(z_r), transpose_a = True)
z_o = self.activation(first_term + second_term + third_term)
return z_o
def update_relationship_nodes(self, object_nodes, relationship_nodes, adjacent_matrix):
z_o = object_nodes
z_r = relationship_nodes
dim = adjacent_matrix.shape
adjacent_matrix_o_compatible = tf.concat([adjacent_matrix, tf.ones([dim[0], (dim[1]-1)*dim[1]])], axis=1)
first_term = dot(adjacent_matrix_o_compatible, self.weight_rs(z_o), transpose_a = True)
second_term = dot(adjacent_matrix_o_compatible, self.weight_rd(z_o),transpose_a = True)
z_r = self.activation(first_term + second_term)
return z_r
## separate embedding transformation that should be inside a overall Scene Graph Conv Net
class Scene_GCNN(tf.keras.layers.Layer):
def __init__(self,
activation=lambda x: x,
use_bias=True,
kernel_initializer='glorot_uniform',
kernel_regularizer=None,
kernel_constraint=None,
bias_initializer='ones',
bias_regularizer=None,
bias_constraint=None,
activity_regularizer=None,
weight_shape_array=None,
**kwargs):
super(Scene_GCNN, self).__init__()
self.activation = activations.get(activation)
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.activity_regularizer = regularizers.get(activity_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.bias_constraint = constraints.get(bias_constraint)
## Initialize number of layers
self.weight_shape_array = weight_shape_array
self.num_iterations = len(weight_shape_array)
self.sgcnn_layers = []
for i, weight_shape in enumerate(self.weight_shape_array):
self.sgcnn_layers.append(Scene_GCNConv(
activation=self.activation,
use_bias=self.use_bias,
kernel_initializer=self.kernel_initializer,
kernel_regularizer=self.kernel_regularizer,
kernel_constraint=self.kernel_constraint,
bias_initializer=self.bias_initializer,
bias_regularizer=self.bias_regularizer,
bias_constraint=self.bias_constraint,
activity_regularizer=self.activity_regularizer,
weight_shape=weight_shape))
d = self.weight_shape_array[0][0]
embed_relationship = tf.keras.models.Sequential()
embed_relationship.add(tf.keras.Input(shape=(6,)))
embed_relationship.add(tf.keras.layers.Dense(d, activation='relu'))
embed_relationship.add(tf.keras.layers.Dense(d, activation=None))
self.embed_relationship = embed_relationship
embed_background = tf.keras.models.Sequential()
embed_background.add(tf.keras.Input(shape=(3,)))
embed_background.add(tf.keras.layers.Dense(d, activation='relu'))
embed_background.add(tf.keras.layers.Dense(d, activation=None))
self.embed_background = embed_background
embed_slots = tf.keras.models.Sequential()
embed_slots.add(tf.keras.Input(shape=(21,)))
embed_slots.add(tf.keras.layers.Dense(d, activation='relu'))
embed_slots.add(tf.keras.layers.Dense(d, activation=None))
self.embed_slots = embed_slots
final_embed_background = tf.keras.models.Sequential()
final_embed_background.add(tf.keras.Input(shape=(21,)))
final_embed_background.add(tf.keras.layers.Dense(3, activation=tf.keras.layers.LeakyReLU(alpha=0.01)))
self.final_embed_background = final_embed_background
# def call(self, slots, background_latent,):
def call(self, inputs):
slots = inputs[0]
background_latent = inputs[1]
#slots [B, num_obj, 21]
#background_latent [B, 1, 3]
background_latent = background_latent[:,None,:]
object_nodes = self.get_object_nodes(slots, background_latent)
relationship_nodes = self.get_relationship_nodes(slots)
for i in range(self.num_iterations):
object_nodes, relationship_nodes = self.sgcnn_layers[i](object_nodes, relationship_nodes)
#object_nodes [B, num_object + 1, 21]
slots = object_nodes[:,0:-1,:]
background_latent = self.final_embed_background(object_nodes[:,-1,:])
# output = [object_nodes, relationship_nodes]
output = [slots, background_latent]
return output
def get_object_nodes(self, slots=None, background_latent = None):
#Embedding of slot
slots_embedded = self.embed_slots(slots)
#Embedding of background
background_latent_embedded = self.embed_background(background_latent)
object_nodes = tf.concat([slots_embedded, background_latent_embedded], axis = 1)
return object_nodes
def get_relationship_nodes(self, slots):
#Relationship nodes, between background and slots
# For nodes connecting two different objects, the geometry feature [20, 49] of 2D object bounding
# boxes and the box corner coordinates of both connected objects normalized by the image height and width are used as
# features.
# In our example, we use x,y,z as values from each slot to get (N+1)^2 x 2d matrix where d=(x,y,z)
# The coordinates are flattened and concatenated in
# the order of source-destination, which differentiate the relationships of different directions.
# For nodes connecting
# objects and layouts, since the relationship is presumably
# different from object-object relationship, we initialize the
# representations with constant values, leaving the job of inferring reasonable relationship representation to SGCN
slots_extended = tf.concat([slots[:,:,18:21], tf.ones([slots.shape[0], 1, 3])],axis=1)
A = tf.repeat(slots_extended, axis = 1,repeats=slots_extended.shape[1])
#Add [B,1,latent_size] to both A and B to include layout
B = tf.tile(slots_extended, multiples=[1,slots_extended.shape[1],1])
relationship_nodes = tf.concat([A,B], axis=2)
relationship_latent_embedded = self.embed_relationship(relationship_nodes)
relationship_nodes = relationship_latent_embedded
return relationship_nodes
if(__name__ == "__main__"):
weight_shape_array=[(64,128),(128,64),(64,21)]
scene_gcnn = Scene_GCNN(
activation='sigmoid',
use_bias=True,
kernel_initializer='glorot_uniform',
kernel_regularizer=None,
kernel_constraint=None,
bias_initializer='glorot_normal',
bias_regularizer=None,
bias_constraint=None,
activity_regularizer=None,
weight_shape_array=weight_shape_array)
slots = tf.random.uniform([8,3,21])
background_latent = tf.random.uniform([8,3])
print(background_latent)
print(scene_gcnn([slots, background_latent]))
#Made up learning rate
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-7)
is_training = True
with tf.GradientTape() as tape:
output = scene_gcnn([slots, background_latent])
#Made up losses, don't think about the metric
losses_background = tf.reduce_sum(tf.random.uniform([8,3]) - output[1])
losses_foreground = tf.reduce_sum(tf.random.uniform([8,3,21]) - output[0])
losses = losses_background + losses_foreground
if is_training:
variables = scene_gcnn.trainable_variables
gradients = tape.gradient(losses, variables)
optimizer.apply_gradients(zip(gradients, variables))
如何避免和解决此警告?我知道这是一个很长的代码示例,但我包含了虚拟测试示例,如果您在其上调用 python 命令(需要 TF2.0),您可以运行该示例。我可以假设我的参数没有被学习,但我看不出这是因为什么原因发生的?我尝试调试这个东西,看看我的漏洞在哪里,理解它一直在努力,但到目前为止还没有结果。
解决方案
推荐阅读
- html - 我怎样才能让卡片在彼此的右侧和左侧堆叠?
- python-3.x - 仅在 Python 中使用 if-else 将整数(从 0 到 999)转换为英文单词时遇到问题
- node.js - 猫鼬没有将我的新文档保存在数组中
- swift - 从所选国家代码返回模板字符串
- azure - 使用 get 和 body 参数调用嵌套的 Azure 逻辑应用
- ansible - 处理 Ansible 的 import_tasks/import_role 和“通知”的最佳方法是什么?
- python - 如何在 Python 中编写井字游戏玩家输入函数?
- activemq - Activemq 代理挂起,hikariCP 和自动提交设置为 false
- python - 我试图在 Python 中的 Number 查找中键入一些数字,但它没有得到任何数字
- python-3.x - 部署无服务器应用程序时 aws-sam-cli 和 jmespath 版本不兼容