python - 损失从不低于 0.35
问题描述
我正在构建模型来对图像序列进行分类。我尝试了不同的模型,但我的训练或验证损失无法低于 0.35。我能得到的最好结果是这样的: Train_Valid_Loss
在训练一个普通的 CNN 分类器时,我习惯于损失值低于 0.1。我尝试了几件事来改进,包括:
- 改变网络复杂性
- 不同的优化器
- 改变辍学
- 图像增强
- 实现学习率调度器
我仍然无法获得低于该点的损失。我正在使用 JAAD 数据库并想对人们的行为(走路或站立)进行分类。我正在使用最后 10 帧来获取功能。我首先尝试了一个 CNN-LSTM,但由于我无法获得低于 0.35 的损失,我尝试了一种不同的方法并为每个人提取了姿势。这样我就可以摆脱 cnn 并使用以关键点为特征的普通 lstm 网络。但是,我仍然无法得到低于 0.35 的损失。我怎样才能改善损失?我在 Pytorch 中使用 nn.BCELoss。我开始认为,代码中存在错误,结果不是因为数据集。代码中最重要的部分如下:
采样器和数据加载器:
class MySampler(torch.utils.data.Sampler):
def __init__(self, end_idx, seq_length):
indices = []
for i in range(len(end_idx)-1):
start = end_idx[i]
end = end_idx[i+1] - seq_length
indices.append(torch.arange(start, end))
indices = torch.cat(indices)
self.indices = indices
def __iter__(self):
indices = self.indices[torch.randperm(len(self.indices))]
return iter(indices.tolist())
def __len__(self):
return len(self.indices)
class MyDataset(Dataset):
def __init__(self, image_paths, seq_length, length,batch):
self.image_paths = image_paths
self.seq_length = seq_length
self.length = length
self.batch=batch
def __getitem__(self, index):
start = index
end = index + self.seq_length
#print('Getting images from {} to {}'.format(start, end))
indices = list(range(start, end))
#print(indices)
images = []
for i in indices:
image_path = self.image_paths[i][0]
image = np.load(image_path)
image=torch.from_numpy(image)
images.append(image)
x = torch.stack(images)
y = torch.tensor([self.batch,self.image_paths[start][1]], dtype=torch.long)
return x, y
def __len__(self):
return self.length
BATCH=1
root_dir = 'path tofolder...\\Dataset\\JAAD\\Pose\\dataset\\Train\\'
class_paths = [d.path for d in os.scandir(root_dir) if d.is_dir]
class_image_paths = []
end_idx = []
for c, class_path in enumerate(class_paths):
for d in os.scandir(class_path):
if d.is_dir:
paths = sorted(glob.glob(os.path.join(d.path, '*.npy')))
# Add class idx to paths
paths = [(p, c) for p in paths]
class_image_paths.extend(paths)
end_idx.extend([len(paths)])
end_idx = [0, *end_idx]
end_idx = torch.cumsum(torch.tensor(end_idx), 0)
seq_length = 6
sampler = MySampler(end_idx, seq_length)
dataset_train = MyDataset(image_paths=class_image_paths,seq_length=seq_length,length=len(sampler),batch=BATCH)
loader_train = DataLoader(dataset_train,batch_size=BATCH,sampler=sampler)
no_stand=0
no_walk=0
lab_list=[]
for data, target in tqdm(loader_train):
lab=int(target[0][1])
lab_list.append(lab)
if lab ==0:
no_stand+=1
elif lab==1:
no_walk+=1
arr = np.array(lab_list)
uni=np.unique(arr)
print(uni)
print('stand: '+ str(no_stand)+ '\t walk: '+ str(no_walk))
模型架构:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.lstm = nn.LSTM(input_size=31,hidden_size=20, num_layers=2,dropout=0.5,batch_first=True).cuda()
self.lstm2 = nn.LSTM(input_size=20,hidden_size=10, num_layers=2,dropout=0.5,batch_first=True).cuda()#,hidden_size =self.hidden_size)
self.fc = nn.Linear(60, 2)
nn.init.xavier_uniform_(self.fc.weight)
self.self_attn = torch.nn.MultiheadAttention(embed_dim=20, num_heads=2)
def forward(self, x):
torch.nan_to_num(x, nan=-1)
x=x.view(BATCH,6, x.shape[2]))
x, hidden = self.lstm(x)
x, weight = self.self_attn(query=x, key=x, value=x)
x, hidden = self.lstm2(x)
x = x.contiguous().view(x.shape[0], x.shape[1]*x.shape[2])
x = self.fc(x)
return x
train_on_gpu = torch.cuda.is_available()
model=Net()
if not train_on_gpu:
print('CUDA is not available. Training on CPU ...')
else:
print('CUDA is available! Training on GPU ...')
if train_on_gpu:
model.cuda()
优化器和损失:
optimizer= optim.Adadelta(model.parameters(), lr=0.01, rho=0.9, eps=1e-06, weight_decay=0.01)
criterion=nn.BCEWithLogitsLoss()
scheduler=ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=3,verbose=True)
火车循环:
n_epochs = 50
NEW_INI = True
loss_values_train=[]
loss_values_valid=[]
if NEW_INI:
valid_loss_min = np.Inf
NEW_INI = False
print(train_on_gpu)
for epoch in range(1, n_epochs+1):
train_loss = 0.0
valid_loss = 0.0
###################
# train the model #
###################
model.train()
for data, target in tqdm(loader_train):
if train_on_gpu:
data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = model(data.float())
#print(data.float())
#print(output)
loss = criterion(output, target.float().cuda())
#print(loss)
loss.backward()
torch.nn.utils.clip_grad_value_(model.parameters(), 1)
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
optimizer.step()
train_loss += loss.item()*data.size(0)
######################
# validate the model #
######################
model.eval()
for data, target in loader_valid:
if train_on_gpu:
data, target = data.cuda(), target.cuda()
output=model(data.float())
loss = criterion(output, target.float().cuda())
valid_loss += loss.item()*data.size(0)
train_loss = train_loss/len(loader_train.sampler)
valid_loss = valid_loss/len(loader_valid.sampler)
loss_values_train.append(train_loss)
loss_values_valid.append(valid_loss)
#train_loss = train_loss/len(train_loader.sampler)
#print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))
print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \tlr: {:.6f}'.format(epoch, train_loss, valid_loss,optimizer.param_groups[0]['lr']))
if valid_loss <= valid_loss_min:
print('Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...'.format(valid_loss_min,valid_loss))
torch.save(model.state_dict(), 'model_pose-v'+str(VERSION)+'.pt')
valid_loss_min = valid_loss
#print('Epoch-{0} lr: {1}'.format(epoch, optimizer.param_groups[0]['lr']))
scheduler.step(valid_loss)
准确度为 47%。你能发现任何可能导致这个问题的东西吗?谢谢 :)
解决方案
推荐阅读
- mysql - 如何在数据库中实现列表
- java - 从 BST 打印节点
- vue.js - 获取商店模块列表
- python - 使用 hasicorp Vault 循环逻辑在值中破坏 ansible k/v 变量
- node.js - Async & Await 的缺点 - NodeJs
- oracle10g - 如何获取之前在 Oracle 10G Express Edition 中运行的命令行 SQL 的所有代码?
- typescript - 使用标签泛型创建接口外的联合
- python-3.x - 从标准偏差拉伸带创建新栅格(.tif),与 dstack 一起使用,但不编写新文件,Python
- php - 如何将数组作为参数绑定到具有未知数量变量(程序)的准备好的语句?
- javascript - 将不同页面的 javascript 文件捆绑在一起时出错