tensorflow - TensorFlow:整个训练过程中的 CosineDifference ObjFunc 常量
问题描述
以下示例是我正在处理的简化版本。我试图找到一个最小化余弦距离的神经网络。我之所以实现自己的余弦差分损失函数,而不是使用 tensorflow 的内置方法,是因为在我的项目的完整版本中,它不太符合我的要求(尽管在这个简单的版本中它们是等价的。)
我将两个正交向量(A 和 B)输入到网络中。我试图减少(A和B)之间的余弦距离。它通过最小化损失函数来做到这一点(它还包括一个在变换向量B时保留向量B长度的组件。)最终我的输出应该是一个向量与向量 A 的方向相同,长度与向量 B 相同。
我遇到的问题是网络输出“vector_B_transformed”并且这个向量永远不会改变。我建立的损失函数在整个训练过程中也是不变的。我尝试过以不同的方式初始化权重,但这并没有帮助。我从来没有在我的全连接网络的最后一层有 relu 函数,并且在隐藏层上尝试了 relu 激活函数——但这似乎没有什么区别。
我粗略地将结果附加到列表中并将它们打印到终端。时期减少到 200 但增加时同样的问题。
如果有人可以帮助我,将不胜感激,因为我真的被困住了。
import math
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.python.framework import ops
# from utils import *
##### New Helper Functions
# weight and bais wrappers
def weight_variable(name, shape):
"""
Create a weight variable with appropriate initialization
:param name: weight name
:param shape: weight shape
:return: initialized weight variable
"""
initer = tf.truncated_normal_initializer(stddev=0.01)
return tf.get_variable('W_' + name,
dtype=tf.float32,
shape=shape,
initializer=initer)
def bias_variable(name, shape):
"""
Create a bias variable with appropriate initialization
:param name: bias variable name
:param shape: bias variable shape
:return: initialized bias variable
"""
initial = tf.constant(0., shape=shape, dtype=tf.float32)
return tf.get_variable('b_' + name,
dtype=tf.float32,
initializer=initial)
def fc_layer(x, num_units, name, use_relu=True):
"""
Create a fully-connected layer
:param x: input from previous layer
:param num_units: number of hidden units in the fully-connected layer
:param name: layer name
:param use_relu: boolean to add ReLU non-linearity (or not)
:return: The output array
"""
in_dim = x.get_shape()[1]
W = weight_variable(name, shape=[in_dim, num_units])
b = bias_variable(name, [num_units])
layer = tf.matmul(x, W)
layer += b
if use_relu:
layer = tf.nn.relu(layer)
return layer
## loss function
def cosine_distance_simple(A, B):
normalize_A = tf.nn.l2_normalize(A,1)
normalize_B = tf.nn.l2_normalize(B,1)
distance_matrix = 1 - tf.matmul(normalize_A, normalize_B, transpose_b=True)
distance_matrix = tf.diag_part(distance_matrix)
distance = tf.reduce_sum(distance_matrix)
return distance
def maintain_length(A, B):
return (tf.norm(A) - tf.norm(B))
from __future__ import division
import tensorflow as tf
# generator network without residual block
def generator(vector, reuse=False, name="generator"):
with tf.variable_scope(name):
if reuse:
tf.get_variable_scope().reuse_variables()
else:
assert tf.get_variable_scope().reuse is False
output_dimension = vector.shape[1]
e1 = fc_layer(vector, 2, name='g_e1', use_relu=False)
e2 = fc_layer(e1, 4, name='g_e2', use_relu=False)
e3 = fc_layer(e2, 8, name='g_e3', use_relu=False)
e4 = fc_layer(e3, 16, name='g_e4', use_relu=False)
e5 = fc_layer(e4, 16, name='g_e5', use_relu=False)
e6 = fc_layer(e5, 8, name='g_e6', use_relu=False)
e7 = fc_layer(e6, 4, name='g_e7', use_relu=False)
e8 = fc_layer(e7, output_dimension, name='g_e8', use_relu=False)
return e8
from __future__ import division
import os
import time
from glob import glob
import tensorflow as tf
import numpy as np
from collections import namedtuple
from sklearn.model_selection import train_test_split
# from module import *
# from utils import *
class cosine_diff_test(object):
def __init__(self, sess, args):
# initialise tensorflow session
self.sess = sess
# data, test, train splits
self.data_A = args.vA
self.data_B = args.vB
self.generator = generator
# when an instance of class cycleGAN is created, build model is automatically called
self._build_model()
def _build_model(self):
#### INPUTS TO NETWORKS
# placeholder for vectors
self.vector_A = tf.placeholder(tf.float32,
[None, 2],
name='vector_A')
self.vector_B = tf.placeholder(tf.float32,
[None, 2],
name='vector_B')
# FCNN to determine vector move required
self.vector_B_ = self.generator(self.vector_B, False, name="generatorB")
# minimise cos_dist between A and B while keeping A same
self.loss = cosine_distance_simple(self.vector_A, self.vector_B_) \
+ maintain_length(self.vector_B, self.vector_B_)
'''
self.loss = abs_criterion(self.vector_A, self.vector_A_) \
+ abs_criterion(self.vector_B, self.vector_B_)
'''
# trainable variables
t_vars = tf.trainable_variables()
# training variables for generator
self.g_vars = [var for var in t_vars if 'generator' in var.name]
def train(self, args):
# placeholder for learning rate
self.lr = tf.placeholder(tf.float32, None, name='learning_rate')
# define optimizer
self.optim = tf.train.AdamOptimizer(self.lr, beta1=args.beta1).minimize(self.loss, var_list=self.g_vars)
# initialise global varibles and run session
init_op = tf.global_variables_initializer()
self.sess.run(init_op)
lr = args.lr
# Import Data
vecA = self.data_A.copy()
vecB = self.data_B.copy()
results_loss = []
results_vector_B_transformed = []
# iterate over the number of epochs definied
for epoch in range(args.epoch):
# Update
vector_B_transformed, _ = self.sess.run(
[self.vector_B_, self.loss],
feed_dict={self.vector_A: vecA,
self.vector_B: vecB,
self.lr: lr})
results_loss.append(_)
results_vector_B_transformed.append(vector_B_transformed)
print(results_loss)
print(results_vector_B_transformed)
origin = args.orig
print('plotting ...')
plt.xlim((-0.5,1.5));
plt.ylim((-0.5,2.5));
plt.quiver(*origin, vecA, vector_B_transformed,
color=['r','b'],angles='xy', scale_units='xy', scale=1);
class Args():
A_vec = np.array([1, 0]).reshape(1,-1)
B_vec = np.array([0, 2]).reshape(1,-1)
ori = np.array([0, 0]).reshape(1,-1)
epoch = 200
lr = 0.0002
vA = A_vec
vB = B_vec
beta1 = 0.5
orig = ori
args = Args()
# TRAIN
tf.reset_default_graph()
tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
with tf.Session(config=tfconfig) as sess:
model = cosine_diff_test(sess, args)
model.train(args)
解决方案
好的,所以我发现了问题,最后的简单错误:
我的优化器不在我的更新中
vector_B_transformed, _ = self.sess.run(
[self.vector_B_, self.loss],
feed_dict={self.vector_A: vecA,
self.vector_B: vecB,
self.lr: lr})
# Update
vector_B_transformed, _, loss = self.sess.run(
[self.vector_B_, self.optim, self.loss],
feed_dict={self.vector_A: vecA,
self.vector_B: vecB,
self.lr: lr})
我的代码仍然没有按预期工作,但它至少在尝试优化某些东西,以便取得进展!
推荐阅读
- mongodb - MongoDB查询替换NULL值
- typescript - ESLint 不会警告我未使用的变量或未导入的组件(NextJs TypeScript)
- pdf.js - PDF.js,获取大纲的所有内容
- android - 找不到 com.zendesk:support-providers:5.0.2
- python - Jupyter notebook 内核不断需要重启
- c# - 如何在 C# 数组中查找和显示特定值
- c# - 进程 Azure AD B2C 自定义策略用于散列数据
- r - check_rhub 正在尝试安装 BiocManager 失败
- spring-boot - 如何将流量重新路由到 ip 到特定端口以显示网站
- reactjs - Jest Globals 收到“不是函数”错误,没有 Globals 可以正常工作