python - 用张量流训练模型,但损失不会下降?
问题描述
我正在训练一个对 62 个字母数字字符进行分类的模型。但是损失值在前几批中首先急剧下降,然后落地,再也没有下降。我不知道哪里错了,也不知道如何调试模型。
这是训练日志的快照:
以下是训练数据的示例:
我使用 4 个 conv 层,后跟 1 个 fc 层,adam 优化器以最大限度地减少日志损失。我仔细检查了图像标签是否正确。所以我不知道还有哪里错了。
这是代码:
import numpy as np
import tensorflow as tf
import os
from PIL import Image
import shutil
import time
input = temp= tf.placeholder(dtype='float32', shape=(None,32,32,1), name='input')#(None,62)
label = tf.placeholder(dtype='float32',shape=(None,62))#(None,62)
temp = tf.layers.conv2d(inputs=temp,filters=32,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,32,32,32)
#temp = tf.layers.dropout(inputs=temp,rate=0.5)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,16,16,32)
temp = tf.layers.conv2d(inputs=temp,filters=64,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,16,16,64)
#temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,8,8,64)
temp = tf.layers.conv2d(inputs=temp,filters=128,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,8,8,128)
temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,4,4,128)
temp = tf.layers.conv2d(inputs=temp,filters=256,kernel_size=(3,3),padding="SAME",activation=tf.nn.relu,kernel_initializer=tf.keras.initializers.he_normal())#(None,4,4,256)
temp = tf.layers.dropout(inputs=temp,rate=0.2)
temp = tf.layers.max_pooling2d(temp,pool_size=[2, 2], strides=2)#(None,2,2,256)
temp = tf.layers.conv2d(inputs=temp,filters=62,kernel_size=(2,2),padding="VALID",kernel_initializer=tf.keras.initializers.he_normal())#fc (None,1,1,62)
output = temp = tf.layers.flatten(temp)#(None,62)
output = tf.nn.softmax(output)
#loss
output_clip = tf.clip_by_value(output,1e-7,1-1e-7)
loss = tf.reduce_mean(tf.reduce_sum(-label*tf.log(output_clip)-(1-label)*tf.log(1-output_clip),axis=-1))#scaler
optimizer = tf.train.AdamOptimizer().minimize(loss)
#accuracy
indexoutput = tf.argmax(output,axis=-1)#(None,)
labelindex = tf.argmax(label,axis=-1)#(None,)
equals = tf.equal(indexoutput,labelindex)#(None,)
equals = tf.reduce_sum(tf.cast(equals,dtype='int8'),axis=-1)#scaler
acc = tf.cast(equals,dtype='float32')/tf.cast(tf.shape(output)[0],dtype='float32')#scaler
def train(epochs):
saver = tf.train.Saver()
lossrec=[]
accrec = []
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
#saver.restore(sess, os.path.join(os.getcwd(),'model_logloss','captchabreak.ckpt'))
valimg,vallabel = next(validategenerator(os.path.join(os.getcwd(),'tests')))
for i in range(epochs):
for j, (trainimg, trainlabel) in enumerate(traingenerator(os.path.join(os.getcwd(),'pics'),32)):
_, trainacc, trainloss = sess.run([optimizer, acc, loss],feed_dict={input: trainimg, label: trainlabel});
valacc, valloss = sess.run([acc, loss], feed_dict={input: np.array(valimg), label: np.array(vallabel)})
print("epoch:{} batch:{} trainloss:{:.4f} validateloss:{:.4f} trainacc:{:.2f} validateacc:{:.2f}"
.format(i, j, trainloss, valloss, trainacc, valacc))
#keep some logs
lossrec.append(valloss)
accrec.append(valacc)
print(lossrec);
print(accrec)
if len(lossrec) >= 3 and valloss >= lossrec[-2] and valloss >= lossrec[-3]: break;
shutil.rmtree("model_logloss")
saver.save(sess, "model_logloss/captchabreak.ckpt")
def traingenerator(path,batch_size):
fs = os.listdir(path);
fs=np.random.permutation(fs)
if batch_size == 0:
batch_size = len(fs)
offset=0
while offset<len(fs):
yield fetch(fs[offset:offset+batch_size],path)
offset+=batch_size
def validategenerator(path):
fs = os.listdir(path);
yield fetch(fs,path)
def fetch(fs,path):
imgs = []
labels = []
for i, fname in enumerate(fs):
fp = os.path.join(path, fname)
imp = Image.open(fp).resize((32, 32));
imp = imp.convert('L')
imp = imp.point(lambda p: p > 210 and 255)
im = np.array(imp)
im = np.expand_dims(im, axis=-1)
imp.close()
c = fname[0]
lb = np.zeros((62))
if ord(c) >= 48 and ord(c) <= 57:
lb[ord(c) - 48] = 1
if ord(c) >= 65 and ord(c) <= 90:
lb[ord(c) - 65 + 10] = 1
if ord(c) >= 97 and ord(c) <= 122:
lb[ord(c) - 97 + 36] = 1
imgs.append(im)
labels.append(lb)
return np.array(imgs), np.array(labels)
if __name__ == "__main__":
train(30)
解决方案
解决了。我忘记将像素值除以 255。
推荐阅读
- elasticsearch - 将字段文本重新索引为日期会导致错误“拒绝将映射更新到...”
- typescript - 如何以角度 2 调用 wordpress 博客 api
- checkbox - 以编程方式取消选中 Vaadin 上下文菜单中的复选框
- python - 我可以通过同一类的静态方法访问和修改类属性
- aws-lambda - 从另一个 lambda 调用 lambda 中的特定事件
- visual-studio-code - 如何添加键绑定以在 VSCode 中打开特定的终端配置文件?
- java - 为什么我需要使用 WebDriver 而不是 ChromeDriver
- python - 如何使用 seaborn.relplot 绘制宽格式数据框
- reactjs - Nextjs auth 使用外部端点的保护路由设置
- c# - 如果没有自己的 ShapeRepresentation,IfcRoof 将无法工作