python - 如何在保持初始数据集的同时对每个时代的训练数据集进行洗牌?
问题描述
感知器
我想比较洗牌训练数据集和不洗牌每个时期的训练数据集之间的训练准确性。我已经完成了没有改组训练数据集的部分,但是我不知道如何通过改组来实现代码到现有代码中,以便绘制一个图表来可视化使用改组和没有改组的训练精度之间的差异。
class Perceptron():
def __init__(self, num_epochs, num_features, averaged):
super().__init__()
self.num_epochs = num_epochs
self.averaged = averaged
self.num_features = num_features
self.weights = None
self.bias = None
def init_parameters(self):
self.weights = np.zeros(self.num_features)
self.bias = 0
pass
def train(self, train_X, train_y, dev_X, dev_y):
self.init_parameters()
train_acc = []
dev_acc = []
for epoch in range(self.num_epochs):
preds = []
for i in range(51775):
if (safe_sparse_dot(self.weights, train_X[i].T, dense_output = True) + self.bias==0):
a=np.array(sample([-1,1],1)).astype('float64')
else:
a = safe_sparse_dot(self.weights, train_X[i].T, dense_output = True) + self.bias
y_hat = np.sign(a)
yhat = preds.append(y_hat)
if (train_y[i]*a) <=0:
self.weights = self.weights+ train_y[i]*train_X[i]
self.bias = self.bias + train_y[i]
arr_ravel = np.array(preds).ravel()
training_acc = np.mean(arr_ravel==train_y)
train_acc.append(training_acc)
development_acc = np.mean(self.predict(dev_X)==dev_y)
dev_acc.append(development_acc)
def predict(self, X):
predicted_labels = []
for j in X:
if ((safe_sparse_dot(self.weights, j.T, dense_output = True) + self.bias)==0):
a=np.array(sample([-1,1],1)).astype('float64')
else:
a = safe_sparse_dot(self.weights, j.T, dense_output = True) + self.bias
y_hat = np.sign(a)
predicted_labels.append(y_hat)
array_ravel = np.array(predicted_labels).ravel()
return array_ravel
解决方案
与其打乱数据,不如创建一个索引数组并在每个时期打乱。这样你就可以保持原来的顺序。
idx = np.arange(train_X.shape[0])
np.random.shuffle(x)
train_X_shuffled = train_X[idx]
train_y_shuffled = train_y[idx]
将此添加到您的代码中(制作原始数据的副本,因此尽可能少地更改代码):
class Perceptron():
def __init__(self, num_epochs, num_features, averaged):
super().__init__()
self.num_epochs = num_epochs
self.averaged = averaged
self.num_features = num_features
self.weights = None
self.bias = None
def init_parameters(self):
self.weights = np.zeros(self.num_features)
self.bias = 0
pass
def train(self, train_X, train_y, dev_X, dev_y):
self.init_parameters()
train_acc = []
dev_acc = []
# Make copies of the original data
train_X_unshuffled = train_X.copy()
train_y_unshuffled = train_y.copy()
idx = np.arange(train_X.shape[0])
for epoch in range(self.num_epochs):
# Get shuffled dataset
np.shuffle(idx)
train_X = train_X_unshuffled[idx]
train_y = train_y_unshuffled[idx]
preds = []
for i in range(51775):
if (safe_sparse_dot(self.weights, train_X[i].T, dense_output = True) + self.bias==0):
a=np.array(sample([-1,1],1)).astype('float64')
else:
a = safe_sparse_dot(self.weights, train_X[i].T, dense_output = True) + self.bias
y_hat = np.sign(a)
yhat = preds.append(y_hat)
if (train_y[i]*a) <=0:
self.weights = self.weights+ train_y[i]*train_X[i]
self.bias = self.bias + train_y[i]
arr_ravel = np.array(preds).ravel()
training_acc = np.mean(arr_ravel==train_y)
train_acc.append(training_acc)
development_acc = np.mean(self.predict(dev_X)==dev_y)
dev_acc.append(development_acc)
def predict(self, X):
predicted_labels = []
for j in X:
if ((safe_sparse_dot(self.weights, j.T, dense_output = True) + self.bias)==0):
a=np.array(sample([-1,1],1)).astype('float64')
else:
a = safe_sparse_dot(self.weights, j.T, dense_output = True) + self.bias
y_hat = np.sign(a)
predicted_labels.append(y_hat)
array_ravel = np.array(predicted_labels).ravel()
return array_ravel
推荐阅读
- assembly - X86:如何将xmm0的下半部分设置为0,而不影响上半部分?
- python - Redisearch 前缀搜索始终返回最大 200 的多字段索引总数
- c - CS50 PSET4 拼写器无法释放内存
- c# - 在 C# 中保持状态标志(32 项以下)的最有效方法
- amazon-dynamodb - 使用 Flink 在 DynamoDB 表中聚合电影租赁信息
- r - 更新重复行中的单个值
- c++ - 如何在 Visual Studio C++ 中将年、月、日、时间转换为 UNIX 时间
- android - 如何从 PlacesAutocomplete 中的位置中选择值并将其存储在 TextFormField 中
- java - 给定一个双调数组,我们需要找到峰值元素
- javascript - 获取异步 TreeView 数据