python - Tensorflow 图像分类损失 NAN
问题描述
所以,我拿了一个图像分类的例子,我试图改变它以适应我的数据类型。我有 2 个类别(但将来可能会更多),包括汽车和其他(其他的意思是别的)。每个图像都是 200x200 RGB,我在这里有一个问题:
我的损失将一直是NAN,与时代的数量无关。
代码
from _future_ import absolute_import
from _future_ import division
from _future_ import print_function
import time
import math
import numpy as np
from PIL import Image
import tensorflow as tf
import os
# Basic model parameters as external flags.
flags = tf.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.001, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 2000, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 128, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('batch_size', 128, 'Batch size. '
'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', os.path.abspath("ModelData"), 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
'for unit testing.')
NUM_CLASSES = 2
IMAGE_SIZE = 200
CHANNELS = 3
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * CHANNELS
def inference(images, hidden1_units, hidden2_units):
# Hidden 1
with tf.name_scope('hidden1'):
weights = tf.Variable(
tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
name='weights')
biases = tf.Variable(tf.zeros([hidden1_units]),
name='biases')
hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
# Hidden 2
with tf.name_scope('hidden2'):
weights = tf.Variable(
tf.truncated_normal([hidden1_units, hidden2_units],
stddev=1.0 / math.sqrt(float(hidden1_units))),
name='weights')
biases = tf.Variable(tf.zeros([hidden2_units]),
name='biases')
hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
# Linear
with tf.name_scope('softmax_linear'):
weights = tf.Variable(
tf.truncated_normal([hidden2_units, NUM_CLASSES],
stddev=1.0 / math.sqrt(float(hidden2_units))),
name='weights')
biases = tf.Variable(tf.zeros([NUM_CLASSES]),
name='biases')
logits = tf.matmul(hidden2, weights) + biases
return logits
def cal_loss(logits, labels):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
def training(loss, learning_rate):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
def evaluation(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
return tf.reduce_sum(tf.cast(correct, tf.int32))
def placeholder_inputs(batch_size):
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, IMAGE_PIXELS))
labels_placeholder = tf.placeholder(tf.int32, shape=batch_size)
return images_placeholder, labels_placeholder
def fill_feed_dict(images_feed, labels_feed, images_pl, labels_pl):
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
}
return feed_dict
def do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_set):
# And run one epoch of eval.
true_count = 0 # Counts the number of correct predictions.
steps_per_epoch = 128 // FLAGS.batch_size
num_examples = steps_per_epoch * FLAGS.batch_size
for step in range(steps_per_epoch):
feed_dict = fill_feed_dict(train_images, train_labels, images_placeholder, labels_placeholder)
true_count += sess.run(eval_correct, feed_dict=feed_dict)
precision = true_count / num_examples
print(' Num examples: %d Num correct: %d Precision @ 1: %0.04f' % (num_examples, true_count, precision))
# Get the sets of images and labels for training, validation, and
def init_training_data_set(dir):
train_images = []
train_labels = []
def GetFoldersList():
mylist = []
filelist = os.listdir(dir)
for name in filelist:
if os.path.isdir(os.path.join(dir, name)):
mylist.append(name)
return mylist
def ReadImagesFromFolder(folder):
fin_dir = os.path.join(dir, folder)
images_name = os.listdir(fin_dir)
images = []
for img_name in images_name:
img_location = os.path.join(dir, folder)
final_loc = os.path.join(img_location, img_name)
try:
import hashlib
hash_folder = int(hashlib.md5(folder.encode()).hexdigest(), 16) % (10 ** 8 )
images.append((np.array(Image.open(final_loc).convert('RGB')), hash_folder))
except:
pass
return images
folders = GetFoldersList()
for folder in folders:
for imgs in ReadImagesFromFolder(folder):
train_images.append(imgs[0])
train_labels.append(imgs[1])
return train_images, train_labels
train_images, train_labels = init_training_data_set(os.path.join("FetchData", "Image"))
train_images = np.array(train_images)
train_images = train_images.reshape(len(train_images), IMAGE_PIXELS)
train_labels = np.array(train_labels)
def run_training():
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
# Generate placeholders for the images and labels.
images_placeholder, labels_placeholder = placeholder_inputs(len(train_images))
# Build a Graph that computes predictions from the inference model.
logits = inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2)
# Add to the Graph the Ops for loss calculation.
loss = cal_loss(logits, labels_placeholder)
# Add to the Graph the Ops that calculate and apply gradients.
train_op = training(loss, FLAGS.learning_rate)
# Add the Op to compare the logits to the labels during evaluation.
eval_correct = evaluation(logits, labels_placeholder)
# Create a saver for writing training checkpoints.
saver = tf.train.Saver()
# Create a session for running Ops on the Graph.
sess = tf.Session()
# Run the Op to initialize the variables.
init = tf.global_variables_initializer()
sess.run(init)
# And then after everything is built, start the training loop.
for step in range(FLAGS.max_steps):
start_time = time.time()
feed_dict = fill_feed_dict(train_images, train_labels, images_placeholder, labels_placeholder)
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
print("Current step is: " + str(step))
print("Current los value: " + str(loss_value))
print("Current duration: " + str(duration))
print("\n")
if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
saver.save(sess, FLAGS.train_dir, global_step=step)
print('Training Data Eval:')
do_eval(sess, eval_correct, images_placeholder, labels_placeholder, train_images)
def main(_):
run_training()
if _name_ == '_main_':
tf.app.run()
另外,我应该提到我已经搜索过这个损失 NAN并且我发现它应该与学习率有一些共同点,所以我已经玩了一点但没有任何改变(我必须改变它什么0.1
都0.0001
没有……)。
因此,如果你们中的任何人知道如何解决此问题以及如何优化此问题或使其简单,请告诉我。
解决方案
推荐阅读
- javascript - 如何导入设置为函数的 live let 变量?
- android-studio - Android Studio 占用大量内存
- javascript - React - 将道具传递给孩子
- php - 第一个 PHP 表单似乎不起作用
- python - 未能在我的网络抓取工具中执行“显示更多评论”
- python - 如何在 Pandas 中读取 Excel,保持没有 NaN 的混合类型列?
- github - 在合并之前请求用户进行(某种)流程验证
- jenkins - 詹金斯文件 | 多分支管道
- ruby-on-rails - Rails API / react / axios 下载损坏的文件
- sql-server - 页面拆分时的读取一致性