首页 > 解决方案 > 使用带有 keras 生成器的 sqlite

问题描述

我正在尝试使用 keras 生成器批量训练 cnn。我的数据在 SQLITE db 中。SQLITE 不能与多线程代码一起工作,所以每次我需要导入批处理时,我打开与 db 的连接,接下来我尝试执行简单的 sql 查询(在简单脚本中执行查询时没有错误),我收到了这个错误:

File "<ipython-input-4-6c84648166ec>", line 1, in <module>
    db_cursor.execute(sql_query)
TypeError: convert_array() takes 1 positional argument but 2 were given

我的代码:

import numpy as np
import keras
import sqlite3
import io

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, path, num_features, end_index, start_index=1,    augmentation_ratio=2, batch_size=250,
                 shuffle=True, exclude = []):
        'Initialization'
        sqlite3.register_adapter(np.ndarray, self.adapt_array)
        sqlite3.register_converter("array", self.convert_array)
        self.path = path
        self.db = sqlite3.connect(self.path, detect_types=sqlite3.PARSE_DECLTYPES, check_same_thread=False)
        self.db_cursor = self.db.cursor()
        self.exclude = exclude
        self.N = num_features
        self.start_index = start_index
        self.end_index = end_index
        self.augmentation_ratio = augmentation_ratio
        self.batch_size = int(batch_size)
        self.shuffle = shuffle
        self.import_size = int(np.floor(self.batch_size / self.augmentation_ratio))
        if exclude.__len__() == 0:
            self.sample_index = np.arange(self.start_index, self.end_index)
        else:
            query = 'SELECT ind FROM TABLE WHERE ind > ? AND ind < ? AND class NOT IN ({})'. \
                format(','.join(str(label) for label in self.exclude))
            self.db_cursor.execute(query, [self.start_index, self.end_index])
            self.sample_index = np.asarray(self.db_cursor.fetchall())
        self.n_samples = self.sample_index.shape[0]
        self.on_epoch_end()
        self.db.close()




    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(self.n_samples / self.import_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        samples_batch = np.arange((index - 1) * self.import_size, index * self.import_size)

        # Generate data
        x, y = self.__data_generation(samples_batch)

        augmented_x = self.augment_data(x)

        samples = np.concatenate((x, augmented_x))
        y_mat = np.concatenate((y, y))
        # return X_reshaped, xData_complete
        return  samples, y_mat

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        if self.shuffle:
            np.random.shuffle(self.sample_index)

    def __data_generation(self, samples_batch):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        x = np.empty((self.import_size, self.N, 1))
        y = np.empty((self.import_size, 1))
        inds = self.sample_index[samples_batch]
        sqlite3.register_adapter(np.ndarray, self.adapt_array)
        sqlite3.register_converter("array", self.convert_array)
        db = sqlite3.connect(self.path, detect_types=sqlite3.PARSE_DECLTYPES)
        db_cursor = db.cursor()
        sql_query = "SELECT class,features FROM TABLE WHERE ind in ({})".\
            format(','.join(str(ind[0]) for ind in inds))
        db_cursor.execute(sql_query)
        for i in range(self.import_size):
            line = db_cursor.fetchone()
            y[i] = line[0]
            x[i, :] = line[1]
        y = keras.utils.to_categorical(y)
        return x, y

    def adapt_array(arr):
        out = io.BytesIO()
        np.save(out, arr)
        out.seek(0)
        return sqlite3.Binary(out.read())

    def convert_array(text):
        out = io.BytesIO(text)
        out.seek(0)
        return np.load(out)

我究竟做错了什么?

标签: pythonsqlitekeras

解决方案


您的adapt_arrayconvert_array方法缺少self论点:

def convert_array(self, text):
    out = io.BytesIO(text)
    out.seek(0)
    return np.load(out)

或者,您可以使用@staticmethod装饰器:

@staticmethod
def convert_array(text):
    out = io.BytesIO(text)
    out.seek(0)
    return np.load(out)

推荐阅读