首页 > 解决方案 > OsError : -2 在使用 ImageDataGenerator 使用 keras 训练模型期间

问题描述

#Now we will create a baseline CNN model similar to LeNet and below is utility class for BASELINE MODEL:

#train and val directory path:
train_dir = r'C:\Users\Ashu\Desktop\AAIC\Data\train'
val_dir = r'C:\Users\Ashu\Desktop\AAIC\Data\val'
nb_samples_train = 319835
nb_samples_val = 39995

class baseLINE_MODEL:

    def __init__(self , img_width , img_height , epochs , batch_size):
        self.img_width = img_width
        self.img_height = img_height
        self.epochs = epochs
        self.batch_size = batch_size



    def LENET(self):

        '''The below architecture is almost similar to the LeNet with some small

            modifications.The below model will be used as the baseline model only.

        '''

        if K.image_data_format == 'channel_first':
            input_shape =  (1 , 512 , 512)
        else:
            input_shape = (512 , 512 , 1) 


        self.model = Sequential(name="LENet")

        self.model.add(Conv2D(filters=32 , kernel_size = (3,3) , input_shape =  input_shape))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size = (2,2) , strides=(2,2)))

        self.model.add(Conv2D(filters = 62 , kernel_size = (3,3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size = (2,2) , strides = (2,2)))

        self.model.add(Conv2D(filters = 128 , kernel_size = (5,5)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size = (2 , 2) , strides = (2,2)))

        self.model.add(Flatten())
        self.model.add(Dense(100))
        self.model.add(Activation('relu'))
        self.model.add(Dense(16))
        self.model.add(Activation('softmax'))

        self.model.compile(loss = keras.losses.categorical_crossentropy , optimizer = 'adam' , metrics = ['accuracy'])
        print(self.model.summary())              

        datagen = ImageDataGenerator(rescale = 1./255)

        #the below code will create the train and validation generator.
        #the images will be feed from the train and validation directory and get rescaled.
        print('Feeding the train and val data.........')
        train_generator = datagen.flow_from_directory(
                                    train_dir,
                                    target_size=(self.img_width,self.img_height),
                                    batch_size=50,
                                    class_mode = 'categorical')

        validation_generator = datagen.flow_from_directory(
                                        val_dir,
                                        target_size=(self.img_width , self.img_height),
                                        batch_size = 50,
                                        class_mode = 'categorical')


        print('Training Starting.....')
        self.model.fit_generator(train_generator , 
                            samples_per_epoch = 319836 , 
                            epochs = self.epochs,
                            validation_data = validation_generator , 
                            validation_steps = 39995)

        self.model.save_weights('fc_top_model.h5')
        print('weights saved...')

我有 16 个类数据,并将图像放在 train 文件夹中,每个类有 16 个文件夹。这是为所有测试和验证完成的。但是当我尝试运行我的代码时,会在以下行抛出错误:model.fit_generator() 输出:

_________________________________________________________________
activation_68 (Activation)   (None, 122, 122, 128)     0         
_________________________________________________________________
max_pooling2d_42 (MaxPooling (None, 61, 61, 128)       0         
_________________________________________________________________
flatten_14 (Flatten)         (None, 476288)            0         
_________________________________________________________________
dense_27 (Dense)             (None, 100)               47628900  
_________________________________________________________________
activation_69 (Activation)   (None, 100)               0         
_________________________________________________________________
dense_28 (Dense)             (None, 16)                1616      
_________________________________________________________________
activation_70 (Activation)   (None, 16)                0         
=================================================================
Total params: 47,847,282
Trainable params: 47,847,282
Non-trainable params: 0
_________________________________________________________________
None
Feeding the train and val data.........
Found 319835 images belonging to 16 classes.
Found 39995 images belonging to 16 classes.
Training Starting.....
Epoch 1/1
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-45-0229da3ad25e> in <module>
----> 1 baseline.LENET()

错误:

et_size, interpolation)
    117     elif color_mode == 'rgb':
    118         if img.mode != 'RGB':
--> 119             img = img.convert('RGB')
    120     else:
    121         raise ValueError('color_mode must be "grayscale", "rgb", or "rgba"')

~\Miniconda3\envs\tensorflow\lib\site-packages\PIL\Image.py in convert(self, mode, matrix, dither, palette, colors)
    928         """
    929 
--> 930         self.load()
    931 
    932         if not mode and self.mode == "P":

~\Miniconda3\envs\tensorflow\lib\site-packages\PIL\TiffImagePlugin.py in load(self)
   1095     def load(self):
   1096         if self.use_load_libtiff:
-> 1097             return self._load_libtiff()
   1098         return super(TiffImageFile, self).load()
   1099 

~\Miniconda3\envs\tensorflow\lib\site-packages\PIL\TiffImagePlugin.py in _load_libtiff(self)
   1207 
   1208         if err < 0:
-> 1209             raise IOError(err)
   1210 
   1211         return Image.Image.load(self)

OSError: -2

请告诉我这里有什么问题。我已经搜索了整个互联网但无法解决这个问题:(

PS:数据集是 RVL-CDIP 数据集。当我使用猫和狗数据集时,上面的代码运行良好。请帮助我。

标签: image-processingkerasdeep-learningconv-neural-network

解决方案


我也有同样的问题,我通过将图像从 TIF 格式转换为 JPEG 格式解决了这个问题。

我系统上的文件夹结构是:

在此处输入图像描述

import os
import cv2

Base_Dir = 'Path-to-Data-Folder'

for Folder in sorted(os.listdir(Base_Dir)):
    for Sub_Folder in sorted(os.listdir(Base_Dir + '/' + Folder)):
        try:
            os.mkdir(Base_Dir + '/' + Folder + '/' + Sub_Folder + '_JPEG')
        except OSError:
            pass
        for File in sorted(os.listdir(Base_Dir + '/' + Folder + '/' + Sub_Folder)):
            if File[-3:] == "tif":
               Out_File = File[:-3] + "jpeg"
               Im = cv2.imread(Base_Dir + '/' + Folder + '/' + Sub_Folder + '/' + File)
               cv2.imwrite(Base_Dir + '/' + Folder + '/' + Sub_Folder + '_JPEG/' + Out_File, Im)

另外,你能告诉我你从哪里得到完整的数据集吗?我只能从 Kaggle 访问其中的一部分。


推荐阅读