python - 使用预训练模型在张量流中训练新模型
问题描述
我正在创建一个 CNN 自动编码器来充当特征提取器,然后是张量流中的简单 MLP 分类器。我是单独进行训练的,所以我首先训练自动编码器将数据编码到较低维度的特征空间中,然后通过将输入传递给经过训练的自动编码器,然后通过 MLP 来分别训练 MLP 分类器。
我目前在连接这两个模型时遇到问题。我的方法是加载旧图,并获取输出和原始输入张量的占位符。然后,我还在原始图的最后一层创建了一个停止梯度,以便我只训练 MLP 而不是自动编码器。然后我使用变量范围仅初始化新图的变量。
运行代码时出现多个错误,从未初始化的变量到太多。有一个更好的方法吗?我将包括下面的代码。
工作自动编码器训练的代码
import tensorflow as tf
import numpy as np
import math
def lrelu(x, leak=0.2, name="lrelu"):
"""Leaky rectifier.
Parameters
----------
x : Tensor
The tensor to apply the nonlinearity to.
leak : float, optional
Leakage parameter.
name : str, optional
Variable scope to use.
Returns
-------
x : Tensor
Output of the nonlinearity.
"""
with tf.variable_scope(name):
f1 = 0.5 * (1 + leak)
f2 = 0.5 * (1 - leak)
return f1 * x + f2 * abs(x)
def corrupt(x):
"""Take an input tensor and add uniform masking.
Parameters
----------
x : Tensor/Placeholder
Input to corrupt.
Returns
-------
x_corrupted : Tensor
50 pct of values corrupted.
"""
return tf.multiply(x, tf.cast(tf.random_uniform(shape=tf.shape(x),
minval=0,
maxval=2,
dtype=tf.int32), tf.float32))
def autoencoder(input_shape = [None, 784],
n_filters = [1, 10, 10, 10],
filter_sizes = [3, 3, 3, 3],
corruption = False):
"""Build a deep denoising autoencoder w/ tied weights.
Parameters
----------
input_shape : list, optional
Description
n_filters : list, optional
Description
filter_sizes : list, optional
Description
Returns
-------
x : Tensor
Input placeholder to the network
z : Tensor
Inner-most latent representation
y : Tensor
Output reconstruction of the input
cost : Tensor
Overall cost to use for training
Raises
------
ValueError
Description
"""
# Input to network
x = tf.placeholder(tf.float32, input_shape, name = 'x')
print(x)
# Convert 2D input is converted to square
if len(x.get_shape()) == 2:
x_dim = np.sqrt(x.get_shape().as_list()[1])
if x_dim != int(x_dim):
raise ValueError('Unsupported Input Dimensions')
x_dim = int(x_dim)
x_tensor = tf.reshape(x, [-1, x_dim, x_dim, n_filters[0]])
elif len(x.get_shape()) == 4:
x_tensor = x
else:
raise ValueError('Unsupported Input Dimensions')
current_input = x_tensor
# Optionally apply denoising autoencoder
if corruption:
current_input = corrupt(current_input)
# Encoder
encoder = []
shapes = []
for layer_i, n_output in enumerate(n_filters[1:]):
n_input = current_input.get_shape().as_list()[3] # This will be # Channels
shapes.append(current_input.get_shape().as_list())
W = tf.Variable(
tf.random_uniform([
filter_sizes[layer_i],
filter_sizes[layer_i],
n_input, n_output],
-1.0 / math.sqrt(n_input),
1.0/math.sqrt(n_input))) # This is so we don't have to initialize ourselves
b = tf.Variable(tf.zeros([n_output]))
encoder.append(W)
output = lrelu(
tf.add(tf.nn.conv2d(
current_input, W, strides = [1,2,2,1], padding = 'SAME'), b))
current_input = output
print(W)
print(b)
print(output)
# Store the latent representation
z = current_input
print(z)
encoder.reverse()
shapes.reverse()
for layer_i, shape in enumerate(shapes):
W = encoder[layer_i]
b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]]))
output = lrelu(tf.add(
tf.nn.conv2d_transpose(
current_input, W,
tf.stack([tf.shape(x)[0], shape[1], shape[2], shape[3]]),
strides = [1,2,2,1], padding = 'SAME'), b))
current_input = output
# Now we have a reconstruction
y = current_input
cost = tf.reduce_sum(tf.square(y - x_tensor))
return {'x': x, 'z': z, 'y': y, 'cost': cost}
# %%
def test_mnist():
"""Test the convolutional autoencder using MNIST."""
# %%
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import matplotlib.pyplot as plt
# %%
# load MNIST as before
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
mean_img = np.mean(mnist.train.images, axis=0)
ae = autoencoder()
# %%
learning_rate = 0.01
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(ae['cost'])
# Create saver
saver = tf.train.Saver(tf.trainable_variables())
# %%
# We create a session to use the graph
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# %%
# Fit all training data
batch_size = 100
n_epochs = 1
for epoch_i in range(n_epochs):
for batch_i in range(mnist.train.num_examples // batch_size):
batch_xs, _ = mnist.train.next_batch(batch_size)
train = np.array([img - mean_img for img in batch_xs])
sess.run(optimizer, feed_dict={ae['x']: train})
print(epoch_i, sess.run(ae['cost'], feed_dict={ae['x']: train}))
save_path = saver.save(sess, "AutoEncoderCheckpoints/AutoEncoderMNIST.ckpt")
print("Model saved in path: %s" % save_path)
# %%
# Plot example reconstructions
n_examples = 10
test_xs, _ = mnist.test.next_batch(n_examples)
test_xs_norm = np.array([img - mean_img for img in test_xs])
recon, latent = sess.run([ae['y'], ae['z']], feed_dict={ae['x']: test_xs_norm})
print(recon.shape)
print(latent.shape)
fig, axs = plt.subplots(2, n_examples, figsize=(20, 6))
for example_i in range(n_examples):
axs[0][example_i].imshow(
np.reshape(test_xs[example_i, :], (28, 28)))
axs[1][example_i].imshow(
np.reshape(
np.reshape(recon[example_i, ...], (784,)) + mean_img,
(28, 28)))
fig.show()
plt.draw()
# plt.waitforbuttonpress()
new_fig, new_axs = plt.subplots(10, n_examples, figsize = (20,20))
for chan in range(10):
for example_i in range(n_examples):
new_axs[chan][example_i].imshow(
np.reshape(latent[example_i,...,chan],
(4,4)))
new_fig.show()
plt.draw()
# %%
if __name__ == '__main__':
test_mnist()
代码无法在不重新训练自动编码器的情况下训练 MLP
aeMLP_saver = tf.train.import_meta_graph('AutoEncoderCheckpoints/AutoEncoderMNIST.ckpt.meta')
aeMLP_graph = tf.get_default_graph()
weights = {
'h1': tf.Variable(tf.random_normal([160, 320])),
'h2': tf.Variable(tf.random_normal([320, 640])),
'out': tf.Variable(tf.random_normal([640, 10]))
}
biases = {
'b1': tf.Variable(tf.random_normal([320])),
'b2': tf.Variable(tf.random_normal([640])),
'out': tf.Variable(tf.random_normal([10]))
}
# with tf.Graph().as_default():
with tf.variable_scope("model2"):
x_plh = aeMLP_graph.get_tensor_by_name('x:0')
output_conv = aeMLP_graph.get_tensor_by_name('lrelu_2/add:0')
output_conv_sg = tf.stop_gradient(output_conv)
print(output_conv_sg)
output_conv_shape = output_conv_sg.get_shape().as_list()
print(output_conv_shape)
new_input = tf.reshape(output_conv_sg, [-1, 160])
Y = tf.placeholder("float", [None, 10])
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(new_input, weights['h1']), biases['b1'])
# Hidden fully connected layer with 256 neurons
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
print(layer_1)
print(layer_2)
print(out_layer)
y_pred = tf.nn.softmax(out_layer)
correct_prediction = tf.equal(tf.argmax(y_pred,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=Y))
learning_rate = 0.001
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss_op)
# out_layer_mlp, y_pred = multilayer_perceptron(new_input)
model_2_variables_list = tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES,
scope="model2"
)
print(model_2_variables_list)
init2 = tf.variables_initializer(model_2_variables_list)
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import matplotlib.pyplot as plt
# %%
# load MNIST as before
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
mean_img = np.mean(mnist.train.images, axis=0)
# Create saver
saver_new = tf.train.Saver()
with tf.Session() as sess:
sess.run(init2)
# %%
# Fit all training data
batch_size = 100
n_epochs = 1
for epoch_i in range(n_epochs):
for batch_i in range(mnist.train.num_examples // batch_size):
batch_xs, batch_ys = mnist.train.next_batch(batch_size)
train = np.array([img - mean_img for img in batch_xs])
_,c = sess.run([optimizer, loss_op], feed_dict={x_plh: train, Y: batch_ys})
print(epoch_i, " || ", c)
batch_xt, batch_yt = mnist.test.next_batch(batch_size)
test = train = np.array([img - mean_img for img in batch_xt])
acc = sess.run(accuracy, feed_dict = {x_plh: test, Y: batch_yt})
print("Accuracy is: ", acc)
save_path = saver_new.save(sess, "AutoEncoderCheckpoints/AutoEncoderClassifierMNIST.ckpt")
print("Model saved in path: %s" % save_path)
上面的两个代码都是可运行的,因此您将能够重新创建我遇到的错误。我已经阅读了一些关于可能冻结图表的帖子,但我不确定这是否是最好的解决方案。
解决方案
如果您实际包含您遇到的错误,这篇文章对其他人会更有用。
第一个明显的问题是导入图形tf.train.import_meta_graph
不会初始化变量。有关调用以实际恢复变量值的示例,请参见https://www.tensorflow.org/api_docs/python/tf/train/import_meta_graph 。restore
在高层次上,由于您拥有构建原始训练图的代码,因此可能不需要进行保存/恢复。您可以解决此问题的一种可能方法是构建整个图(AE 和 MLP)。首先训练 AE(通过调用sess.run
AE 的训练操作),然后 stop_gradients 并训练 MLP。您还可以根据towers
需要单独构建共享变量。我建议不进行保存/恢复的原因(除非您有其他用例)是因为依赖张量名称可能很脆弱。
推荐阅读
- reactjs - 无法读取从快递返回的对象的长度(反应)
- vba - Solidworks API - 从文本文件中读取数据
- c# - 发布 .net 时出错 - 带有 connectionString 的核心 3.1 Web 服务
- javascript - ajax 无法在 php 中的下拉列表更改时将数据显示到文本区域
- java - 无法为 JavaFX 的 hbox 添加圆角?如何解决这个问题?
- identityserver4 - IdentityServer4 授权头认证
- javascript - 如何使用 HammerJS 和 CSS 创建 iOS 样式拖动?
- python - numpy.linalg.det() 可以计算错误吗?同一矩阵的两个不同结果
- mongodb - 添加 adminOfAnyDatabase 的身份验证后,Mongo shell 连接失败
- batch-file - 测试完成后如何返回更改的时区?