首页 > 解决方案 > 如何在保持初始数据集的同时对每个时代的训练数据集进行洗牌?

问题描述

感知器

我想比较洗牌训练数据集和不洗牌每个时期的训练数据集之间的训练准确性。我已经完成了没有改组训练数据集的部分,但是我不知道如何通过改组来实现代码到现有代码中,以便绘制一个图表来可视化使用改组和没有改组的训练精度之间的差异。

class Perceptron():
    def __init__(self, num_epochs, num_features, averaged):
        super().__init__()
        self.num_epochs = num_epochs
        self.averaged = averaged
        self.num_features = num_features
        self.weights = None
        self.bias = None

    def init_parameters(self):
        self.weights = np.zeros(self.num_features)
        self.bias = 0

        pass

    def train(self, train_X, train_y, dev_X, dev_y):
        self.init_parameters()

        train_acc = []
        dev_acc = []

        for epoch in range(self.num_epochs):

            preds = []

            for i in range(51775):
                if (safe_sparse_dot(self.weights, train_X[i].T, dense_output = True) + self.bias==0):
                    a=np.array(sample([-1,1],1)).astype('float64')
                else:
                    a = safe_sparse_dot(self.weights, train_X[i].T, dense_output = True) + self.bias 
                y_hat = np.sign(a)
                yhat = preds.append(y_hat)
                if (train_y[i]*a) <=0:
                    self.weights = self.weights+ train_y[i]*train_X[i]
                    self.bias = self.bias + train_y[i]

            arr_ravel = np.array(preds).ravel()
            training_acc = np.mean(arr_ravel==train_y)
            train_acc.append(training_acc)
            development_acc = np.mean(self.predict(dev_X)==dev_y)
            dev_acc.append(development_acc)

    def predict(self, X):
        predicted_labels = []
        for j in X:
            if ((safe_sparse_dot(self.weights, j.T, dense_output = True) + self.bias)==0):
                a=np.array(sample([-1,1],1)).astype('float64')
            else:
                a = safe_sparse_dot(self.weights, j.T, dense_output = True) + self.bias              
            y_hat = np.sign(a)
            predicted_labels.append(y_hat)
        array_ravel = np.array(predicted_labels).ravel()

        return array_ravel

标签: pythonshuffle

解决方案


与其打乱数据,不如创建一个索引数组并在每个时期打乱。这样你就可以保持原来的顺序。

idx = np.arange(train_X.shape[0])
np.random.shuffle(x)
train_X_shuffled = train_X[idx]
train_y_shuffled = train_y[idx]

将此添加到您的代码中(制作原始数据的副本,因此尽可能少地更改代码):

class Perceptron():
    def __init__(self, num_epochs, num_features, averaged):
        super().__init__()
        self.num_epochs = num_epochs
        self.averaged = averaged
        self.num_features = num_features
        self.weights = None
        self.bias = None

    def init_parameters(self):
        self.weights = np.zeros(self.num_features)
        self.bias = 0

        pass

    def train(self, train_X, train_y, dev_X, dev_y):
        self.init_parameters()

        train_acc = []
        dev_acc = []

        # Make copies of the original data
        train_X_unshuffled = train_X.copy()
        train_y_unshuffled = train_y.copy()
        idx = np.arange(train_X.shape[0])

        for epoch in range(self.num_epochs):
            # Get shuffled dataset
            np.shuffle(idx)
            train_X = train_X_unshuffled[idx]
            train_y = train_y_unshuffled[idx]


            preds = []

            for i in range(51775):
                if (safe_sparse_dot(self.weights, train_X[i].T, dense_output = True) + self.bias==0):
                    a=np.array(sample([-1,1],1)).astype('float64')
                else:
                    a = safe_sparse_dot(self.weights, train_X[i].T, dense_output = True) + self.bias 
                y_hat = np.sign(a)
                yhat = preds.append(y_hat)
                if (train_y[i]*a) <=0:
                    self.weights = self.weights+ train_y[i]*train_X[i]
                    self.bias = self.bias + train_y[i]

            arr_ravel = np.array(preds).ravel()
            training_acc = np.mean(arr_ravel==train_y)
            train_acc.append(training_acc)
            development_acc = np.mean(self.predict(dev_X)==dev_y)
            dev_acc.append(development_acc)

    def predict(self, X):
        predicted_labels = []
        for j in X:
            if ((safe_sparse_dot(self.weights, j.T, dense_output = True) + self.bias)==0):
                a=np.array(sample([-1,1],1)).astype('float64')
            else:
                a = safe_sparse_dot(self.weights, j.T, dense_output = True) + self.bias              
            y_hat = np.sign(a)
            predicted_labels.append(y_hat)
        array_ravel = np.array(predicted_labels).ravel()

        return array_ravel

推荐阅读