首页 > 解决方案 > 预期 input_5 有 4 个维度,但得到了形状为 (24, 1) 的数组

问题描述

由于我之前的预处理问题,我决定采用一种新方法来预处理数据集:


import os.path
import os
import tqdm
import glob
from imutils import paths
from sklearn.preprocessing import LabelEncoder

def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    listOfFile.sort()
    allFiles = list()
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)             
    return allFiles        

def main():  
    dirName = '/content/drive/My Drive/casia-299-small'
    save_path = '/content/drive/My Drive/face_recognition/inceptionv2'
    listOfFiles = getListOfFiles(dirName)
    completeName = os.path.join(save_path, "to_train"+".txt")
    completeName2 = os.path.join(save_path, "to_test"+".txt")

    file_to_write = open(completeName, "w")
    for file in listOfFiles: 
        string_name = file
        file_to_write.write(string_name)
        file_to_write.write('\n')
    file_to_write.close()


    file_to_write = open(completeName2, "w")
    for file in listOfFiles:
        label = file.split(os.path.sep)[-2]
        file_to_write.write(label)
        file_to_write.write('\n')
    file_to_write.close()


if __name__ == '__main__':
    main()

然后我使用文件to_train.txtto_test.txt两个大小为(图像数量,1)的数组,表示数据集中每个图像的路径和每个图像的标签。我将它们提供给我的 CNN 进行如下训练:

# Model 2 try (my)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from imutils import paths
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications import imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.optimizers import RMSprop
from keras.layers import Input
from keras.models import Model
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.layers.core import Flatten 
import numpy as np 
import random 
import os
from keras.applications.inception_resnet_v2 import InceptionResNetV2
import numpy as np 
import random 
import os
import cv2
import pickle 
from keras import backend as K 
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import *
from keras.optimizers import *
from keras import applications
import h5py


images = []
labels = []

image_path = "/content/drive/My Drive/face_recognition/inceptionv2/to_train.txt"
label_path = "/content/drive/My Drive/face_recognition/inceptionv2/to_test.txt"

with open(image_path) as fp:
    line = fp.readline()
    while line:
        line = fp.readline()
        my_string = line.replace("\n", "")
        images.append(my_string)

with open(label_path) as fp:
    line = fp.readline()
    while line:
        line = fp.readline()
        my_string = line.replace("\n", "")
        labels.append(my_string)

print("Images: ", images)
print("Labels: ", labels)


x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size = 0.2, random_state = 42)

print(np.shape(x_train), np.shape(x_test), np.shape(y_train), np.shape(y_test))

def convnet_model_():
    initial_inceptionv2_model = InceptionResNetV2(weights=None, include_top = False, input_shape = (299, 299, 3))
    x = initial_inceptionv2_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(4096, activation = 'relu')(x)
    x = Dropout(0.6)(x)
    x = Dense(4096, activation = 'relu')(x)
    x = Dropout(0.6)(x)
    x = Lambda(lambda x: K.l2_normalize(x, axis = 1))(x)
    convnet_model = Model(inputs=initial_inceptionv2_model.input, outputs = x)
    return convnet_model

def deep_rank_model():
    convnet_model = convnet_model_()

    first_input = Input(shape = (299, 299, 3))
    first_conv = Conv2D(96, kernel_size = (8, 8), strides = (16, 16), padding = 'same')(first_input)
    first_max = MaxPool2D(pool_size = (3, 3), strides = (2, 2), padding = 'same')(first_conv)
    first_max = Flatten()(first_max)
    first_max = Lambda(lambda x: K.l2_normalize(x, axis =1))(first_max)

    second_input = Input(shape = (299, 299, 3))
    second_conv = Conv2D(96, kernel_size = (8, 8), strides = (32, 32), padding = 'same')(second_input)
    second_max = MaxPool2D(pool_size = (7, 7), strides = (4, 4), padding = 'same')(second_conv)
    second_max = Flatten()(second_max)
    second_max = Lambda(lambda x: K.l2_normalize(x, axis = 1))(second_max)

    merge_one = concatenate([first_max, second_max])
    merge_two = concatenate([merge_one, convnet_model.output])
    emb = Dense(4096)(merge_two)
    emb = Dense(128)(emb)
    l2_norm_final = Lambda(lambda x: K.l2_normalize(x, axis = 1))(emb)

    final_model = Model(inputs = [first_input, second_input, convnet_model.input], outputs = l2_norm_final)
    return final_model

deep_rank_model = deep_rank_model()
deep_rank_model.summary()

batch_size = 24
_EPSILON = K.epsilon()
def _loss_tensor(y_true, y_pred):
    y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON)
    loss = 0.
    g = 1. 
    for i in range(0, batch_size, 3):
        try:
            q_embedding = y_pred[i]
            p_embedding = y_pred[i+1]
            n_embedding = y_pred[i+2]
            D_q_p = K.sqrt(K.sum((q_embedding - p_embedding)**2))
            D_q_n = K.sqrt(K.sum((q_embedding - n_embedding)**2))
            loss = loss + g + D_q_p - D_q_n

        except:
            continue

    loss = loss/batch_size*3
    return K.maximum(loss, 0)

print("Compile: ")
deep_rank_model.compile(loss=_loss_tensor, optimizer = SGD(lr = 0.001, momentum=0.9, nesterov=True))

def image_batch_generator(images, labels, batch_size):
    labels = np.array(labels)
    while True:
        batch_paths = np.random.choice(a = len(images), size = batch_size//3)
        input_1 = []

        for i in batch_paths:
            pos = np.where(labels == labels[i])[0]
            neg = np.where(labels != labels[i])[0]

            j = np.random.choice(pos)
            while j == i:
                j = np.random.choice(pos)

            k = np.random.choice(neg)
            while(k == i):
                k = np.random.choice(neg)

            input_1.append(cv2.imread(images[i]))
            input_1.append(cv2.imread(images[j]))
            input_1.append(cv2.imread(images[k]))

        input_1 = np.array(input_1)
        input = [input_1, input_1, input_1]
        print("Input shape: ": , np.shape(input))
        yield (input, np.zeros((batch_size, )))

from keras.callbacks import ModelCheckpoint

filePath = '/content/drive/My Drive/casia-299-small/model_2_try_2.h5'
checkpoint = ModelCheckpoint(filePath, monitor = 'loss', verbose = 1, save_weights_only=False, save_best_only=True, mode = 'min')
callbacks_list = [checkpoint]

deep_rank_model.fit_generator(generator=image_batch_generator(x_train, y_train, batch_size), 
                              steps_per_epoch = len(x_train)//batch_size, 
                              epochs = 20, 
                              verbose = 1, 
                              callbacks = callbacks_list)

然后在训练过程中,我遇到了这个新问题: ValueError: Error when checking input: expected input_5 to have 4 dimensions, but got array with shape (24, 1)

我的代码有问题吗?我真的不确定。任何有帮助的人都是一个很大的赞赏。

更新:在训练过程中,我刚刚注意到并非所有输出都会出现错误:我print("Shape of input: ", np.shape(input))在函数中使用image_batch_generator

错误截图

所以我猜有问题cv2.imread(),这并不总是成功读取图像。我想知道是否有人发现了这个问题?我现在很迷茫?

标签: pythonkerasdeep-learningcomputer-vision

解决方案


在您的脚本中使用的 Conv2D 需要一个具有四个维度的输入形状,这意味着您需要使用numpy.reshape()将具有 (24,1) 维度的数组更改为 (1,24,1,3) 之类的东西(3 表示 RGB 图像)。这是 numpy.reshape() 文档的链接。

https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html


推荐阅读