tensorflow - 使用 Keras 复制结果
问题描述
我正在尝试使用 Tensorflow 和 Keras(带有 TF 后端)复制我的实验结果。当我使用 TF 时,我首先在脚本顶部为 numpy 和 tensorflow 图设置随机种子。我没有使用任何 dropout 层或其他可能引入随机性的方法(我能想到的)。
在运行此类模型时,无论其网络规模如何,总是会产生相同的结果。
TF 实验 1:
('Epoch', 99, '完成了', 100, 'loss: ', 289.8982433080673, 'accuracy: ', 0.6875)
TF 实验 2:
('Epoch', 99, '完成了', 100, 'loss: ', 289.8982433080673, 'accuracy: ', 0.6875)
当我尝试使用具有相同配置的 Keras 复制这些结果时,我失败了。最重要的是,每次单独运行都会产生不同的性能。
我的 TF 代码可以复制结果,如下所示: 片段参考:https ://www.youtube.com/watch?v=BhpvH5DuVu8&list=PLQVvvaa0QuDfKTOs3Keq_kaG2P55YRn5v&index=46
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
## import system modules
#
import os
import sys
## import ML modules
#
import tensorflow as tf
import numpy as np
from keras.utils import to_categorical
from sklearn import preprocessing
logs_path = '../logs/'
## Default constants
#
NO_OF_CLASSES = 2
BATCH_SIZE = 32
FEAT_DIM = 26
N_nodes_hl1 = 300
N_nodes_hl2 = 30
N_nodes_hl3 = 30
## define the network architecture
#
## This model is a simple multilayer perceptron network with 3 hidden layers.
## Input to the layer has the dimensions equal to feature dimensions.
## We create a complete graph in this method with input placeholder as an input argument and
## output placeholder as an returning argument
#
def neural_network_model(data):
## defining dictionaries specifying the specification of each layer.
#
hidden_1_layer = {'weights': tf.Variable(tf.random_normal([FEAT_DIM, N_nodes_hl1]), name='w1'),\
'biases': tf.Variable(tf.random_normal([N_nodes_hl1]), name='b1')}
hidden_2_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl1, N_nodes_hl2]), name='w2'), \
'biases': tf.Variable(tf.random_normal([N_nodes_hl2]), name='b2')}
hidden_3_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl2, N_nodes_hl3]), name='w3'),\
'biases': tf.Variable(tf.random_normal([N_nodes_hl3]), name='b3')}
output_layer = {'weights': tf.Variable(tf.random_normal([N_nodes_hl3, NO_OF_CLASSES]), name='w4'), \
'biases': tf.Variable(tf.random_normal([NO_OF_CLASSES]), name='b4')}
l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
l3 = tf.nn.relu(l3)
output = tf.add(tf.matmul(l3, output_layer['weights']), output_layer['biases'], name="last_layer")
## return the final layer's output gracefully
#
return output
## end of method
#
## This method trains a neural network along with collecting statistics related to
## the graphs.
#
def train_neural_network(xtrain, ytrain, odir):
learning_rate = 0.0008
epoch_iter = 100
## input/ output placeholders where data would be plugged in...
#
x = tf.placeholder('float', [None, FEAT_DIM], name="input")
y_ = tf.placeholder('float', name="output")
## define the network
#
logits = neural_network_model(x)
prediction = tf.nn.softmax(logits, name="op_to_restore") ## softmax normalizes the output results
loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = y_) )
## Major OP for the training procedure. The "train" op defined here tries to minimize loss
#
with tf.name_scope('ADAM'):
# Gradient Descent
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
with tf.name_scope('Accuracy'):
## Accuracy calculation by comparing the predicted and detected labels
#
acc = tf.equal(tf.argmax(logits, 1), tf.argmax(y_, 1))
acc = tf.reduce_mean(tf.cast(acc, tf.float32))
## summary and display variables
#
loss_sum = tf.summary.scalar("loss", loss)
acc_sum = tf.summary.scalar("accuracy", acc)
## Merge all summaries into a single variable. This summaries will be displayed using Tensorboard
#
merged_summary_op = tf.summary.merge([loss_sum, acc_sum])
## create a session for the graph (graph initialization)
#
with tf.Session() as sess:
## initialize all the variables. Note that before this point, all the variables were empty buckets !!
#
sess.run(tf.global_variables_initializer())
## initialize the summary writer (For tensorboard)
#
summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())
## iterate over epochs (complete forward-backward for the entire training set)
#
for epoch in range(epoch_iter):
## initialize some variables to keep track of progress during training
#
epoch_loss = 0
epoch_accuracy = 0
## minibatch training. Splitting input data in to smaller chunks is better
#
for i in range( int(len(xtrain)/ BATCH_SIZE) ):
epoch_x = xtrain[ i * BATCH_SIZE : i * BATCH_SIZE + BATCH_SIZE]
epoch_y = ytrain[ i * BATCH_SIZE : i * BATCH_SIZE + BATCH_SIZE]
## run the session and collect the intermediate stats. Feed dict kwarg takes in input/output placeholdar names as
## a key and features/labels as values
#
_, ac, ls, summary = sess.run([train, acc, loss, merged_summary_op], feed_dict = {x: epoch_x, y_: epoch_y})
## write the the summary in logs to visualize it later
#
summary_writer.add_summary(summary, epoch * int(len(xtrain)/BATCH_SIZE)+i)
## update stats
#
epoch_loss += ls
epoch_accuracy += ac
print ("Epoch ", epoch, " completed out of ", epoch_iter, " loss: ", epoch_loss, "accuracy: ", ac)
## saver module to save tf graph variables.. etc....
我复制结果的 Keras 脚本如下所示:
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(2)
## import system modules
#
import os
import sys
## import ML and datatype modules
#
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import ModelCheckpoint
from keras.optimizers import SGD
from keras.utils import to_categorical
from sklearn import preprocessing
## Default constants
#
NO_OF_CLASSES = 2
BATCH_SIZE = 32
FEAT_DIM = 26
N_nodes_hl1 = 300
N_nodes_hl2 = 30
N_nodes_hl3 = 30
## This method defines the NN architecture as well as performs training and saves the model info
#
def train_neural_network(xtrain, ytrain, odir):
learning_rate = 0.009
## Define the network (MLP)
#
model = Sequential()
model.add(Dense(N_nodes_hl1, input_dim=FEAT_DIM, activation="relu"))
model.add(Dense(N_nodes_hl2, activation="relu"))
model.add(Dense(N_nodes_hl3, activation="relu"))
model.add(Dense(NO_OF_CLASSES, activation="softmax"))
## optimizer
#
sgd = SGD(lr=learning_rate)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=['accuracy'])
print model.summary()
## train the model
model.fit(x=xtrain, y=ytrain, epochs=100)
Keras 实验 1:损失:0.5964 - acc:0.6725
Keras 实验 2:损失:0.5974 - acc:0.6712
两个脚本之间的唯一区别是优化器。我认为这不会在训练期间引入任何随机性。此外,我相信 NN 架构应该产生相同的结果,在 CPU 上精度高达 float64(由于硬件功能,在 GPU 上精度高达 float32)。
我的 Keras 脚本中缺少什么?另外,如果我的理解在此查询中的某处有误,请纠正我。
除此之外,将高度赞赏有关如何复制 NN 结果的其他参考资料(以下除外)。
https://machinelearningmastery.com/reproducible-results-neural-networks-keras/
解决方案
推荐阅读
- c# - c# Naaudio音频电平捕获和显示,仅在打开录音属性时有效
- javascript - 多语言验证处理逻辑
- javascript - 如何动态更改 logoURL?
- python - 在python中对不同的列进行分组
- jquery - 如何通过下拉列表的变化根据数据属性对div进行排序
- qt - Qt QTreeView indexBelow 不起作用
- ios - 无法通过whatsapp分享pdf
- javascript - 为什么无论代码中的顺序如何,alert() 对话框都会首先显示?
- python - Django-,在模型表单中显示和过滤来自外键的特定字段
- python - 如何使用python对图像进行base64编码