首页 > 解决方案 > keras 图像语义分割给出重复输出(SEGNET 基本模型)

问题描述

我正在尝试使用带有 tensorflow 后端的 keras 对卫星图像进行语义分割。我正在使用 SEGNET 基本模型进行图像分割。
我正在使用 python 3.6

我的问题是关于我在输出图像中得到的重复模式,而不管输入图像如何。我做错了什么以及如何解决它。我应该采取的下一步措施是什么。我的模型需要超过 1 小时才能训练,这是否正常?

关于我的问题和完整代码的详细信息如下。

import random
import tifffile
import cv2 as cv
import numpy as np
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from keras.models import *
from keras.layers import *
from keras import backend as K

我使用 1600 张大小为 256x256 的图像作为训练集。

image1 = tifffile.imread("D:\Programs\Ankit\satellite8.tif")
image2 = tifffile.imread("D:\Programs\Ankit\satellite8w.tif")
cv.imwrite("image1.jpg", image1)
cv.imwrite("image2.jpg", image2)
image3 = cv.imread("D:\Programs\Ankit\image1.jpg")
dim1 = image3.shape
dim2 = image2.shape
size = 256
ints1 = list(range(0,dim1[0],size))
ints2 = list(range(0,dim1[1],size))
print (len(ints1))
print (len(ints2))
print(dim1, dim2)
print(ints1)
print(ints2)

i = len(ints1)
j= len(ints2)  

我的输入图像和蒙版图像(标签)是尺寸太大而无法包含的 geotiff。
以下代码迭代地从马赛克中读取 256X256 图像

img = np.zeros(((i-1)*(j-1),size,size,3))
print(img.shape)
m=1
for k in range(1,i):
    for n in range(1,j):
        img[m-1, :,:,:] = image3[ints1[k-1]:ints1[k],ints2[n-       
        1]:ints2[n],0:3]
        m += 1
#print(m)
print(img.shape)

m = 1
clt =np.zeros(((i-1)*(j-1),size,size))
for k in range(1,i):
    for n in range(1,j):
        clt[m-1, :,:] = image2[ints1[k-1]:ints1[k],ints2[n-1]:ints2[n]]
        m += 1
    #print(m)
print(clt.shape)

#Setting train and test data

train_X = img[0:1600,:,:,:]
test_X = img[1600:,:,:,:]
train_y = clt[0:1600,:,:]
test_y = clt[1600:,:,:]  

train_labels = train_y.reshape(1600,size*size)
print(train_labels.shape)

train_labels = to_categorical(train_labels)
print(train_labels.shape)

SEGNET 基本模型的变量

early_stopping_monitor = EarlyStopping(patience=3)
kernel = 3
filter_size = 64
pad = 1
pool_size = 2
input_height = size
input_width = size
nClasses =5

我的 SEGNET 基本模型变体如下

model = Sequential()
model.add(Layer(input_shape=(input_height , input_width, 3)))

# encoder
#model.add(ZeroPadding2D(padding=(pad,pad)))
model.add(Conv2D(filter_size, (kernel, kernel), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))

#model.add(ZeroPadding2D(padding=(pad,pad)))
model.add(Conv2D(128, (kernel, kernel), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))

#model.add(ZeroPadding2D(padding=(pad,pad)))
model.add(Conv2D(256, (kernel, kernel), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))

model.add(Conv2D(512, (kernel, kernel), padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(pool_size, pool_size)))

model.add( Conv2D(512, (kernel, kernel), padding = 'same'))
model.add( BatchNormalization())
model.add( UpSampling2D(size=(pool_size,pool_size)))

model.add( Conv2D(256, (kernel, kernel), padding='same'))
model.add( BatchNormalization())

model.add( UpSampling2D(size=(pool_size,pool_size)))
#model.add( ZeroPadding2D(padding=(pad,pad)))
model.add( Conv2D(128,(kernel, kernel), padding='same'))
model.add( BatchNormalization())

model.add( UpSampling2D(size=(pool_size,pool_size)))
#model.add( ZeroPadding2D(padding=(pad,pad)))
model.add( Conv2D(filter_size,(kernel, kernel), padding='same'))
model.add( BatchNormalization())
model.add( UpSampling2D(size=(pool_size,pool_size)))

model.add(Conv2D( nClasses , (1, 1), padding='same',))

model.outputHeight = model.output_shape[-2]
model.outputWidth = model.output_shape[-1]

model.add(Reshape(( nClasses ,  model.output_shape[2]*model.output_shape[1]                       
), input_shape=( nClasses , model.output_shape[-2], model.output_shape[-1]  
)))
model.add(Permute((2, 1)))
model.add(Activation('softmax'))
#model.add(Dense(3))

#model.add(Reshape((256,256, 3), input_shape=( nClasses ,                                     
model.output_shape[-2], model.output_shape[-1])))
model.compile(loss="categorical_crossentropy", optimizer= 'adadelta' , 
metrics=['accuracy'] )
model.summary()

model.fit(train_X, train_labels, epochs =1,verbose = 1, callbacks = 
[early_stopping_monitor], validation_split = 0.2, shuffle = True)

该模型需要超过 1 小时的时间来训练并给出 25% 的验证准确度预测部分和后处理如下

ypreds = model.predict(test_X, verbose = 1)
print(ypreds.shape)
#ypreds.reshape(2,256,256,17)
#print(ypreds.shape)

pred1 = ypreds[3,0:size*size,0:nClasses]
pred1 = pred1.reshape(size,size,nClasses)


pred = np.argmax(pred1, axis = 2)
colors = [(random.randint(0,255),random.randint(0,255),random.randint(0,255)   
) for _ in range(nClasses)  ]
seg_img = np.zeros( ( size, size, 3  ) )
for c in range(nClasses):
    seg_img[:,:,0] += ( (pred[:,: ] == c )*( colors[c][0] )).astype('uint8')
    seg_img[:,:,1] += ((pred[:,: ] == c )*( colors[c][1] )).astype('uint8')
    seg_img[:,:,2] += ((pred[:,: ] == c )*( colors[c][2] )).astype('uint8')
seg_img = cv.resize(seg_img , (size, size))
cv.imwrite(  "pred.jpg" , seg_img )
print(seg_img.shape)
print(seg_img)

在此之后我是否缺少一些步骤?
我的输出图像是这个

输出预测

标签: pythontensorflowkerasimage-segmentationsemantic-segmentation

解决方案


推荐阅读