首页 > 解决方案 > 损失从不低于 0.35

问题描述

我正在构建模型来对图像序列进行分类。我尝试了不同的模型,但我的训练或验证损失无法低于 0.35。我能得到的最好结果是这样的: Train_Valid_Loss

在训练一个普通的 CNN 分类器时,我习惯于损失值低于 0.1。我尝试了几件事来改进,包括:

我仍然无法获得低于该点的损失。我正在使用 JAAD 数据库并想对人们的行为(走路或站立)进行分类。我正在使用最后 10 帧来获取功能。我首先尝试了一个 CNN-LSTM,但由于我无法获得低于 0.35 的损失,我尝试了一种不同的方法并为每个人提取了姿势。这样我就可以摆脱 cnn 并使用以关键点为特征的普通 lstm 网络。但是,我仍然无法得到低于 0.35 的损失。我怎样才能改善损失?我在 Pytorch 中使用 nn.BCELoss。我开始认为,代码中存在错误,结果不是因为数据集。代码中最重要的部分如下:

采样器和数据加载器:

class MySampler(torch.utils.data.Sampler):
    def __init__(self, end_idx, seq_length):        
        indices = []
        for i in range(len(end_idx)-1):
            start = end_idx[i]
            end = end_idx[i+1] - seq_length
            indices.append(torch.arange(start, end))
        indices = torch.cat(indices)
        self.indices = indices
        
    def __iter__(self):
        indices = self.indices[torch.randperm(len(self.indices))]
        return iter(indices.tolist())
    
    def __len__(self):
        return len(self.indices)

class MyDataset(Dataset):
    def __init__(self, image_paths, seq_length, length,batch):
        self.image_paths = image_paths
        self.seq_length = seq_length
        self.length = length
        self.batch=batch
        
    def __getitem__(self, index):
        start = index
        end = index + self.seq_length
        #print('Getting images from {} to {}'.format(start, end))
        indices = list(range(start, end))
        #print(indices)
        images = []
        for i in indices:
            image_path = self.image_paths[i][0]
            image = np.load(image_path)
            image=torch.from_numpy(image)
            images.append(image)
                      
        x = torch.stack(images)
        y = torch.tensor([self.batch,self.image_paths[start][1]], dtype=torch.long)
        
        return x, y
    
    def __len__(self):
        return self.length

BATCH=1

root_dir = 'path tofolder...\\Dataset\\JAAD\\Pose\\dataset\\Train\\'
class_paths = [d.path for d in os.scandir(root_dir) if d.is_dir]


class_image_paths = []
end_idx = []
for c, class_path in enumerate(class_paths):
    for d in os.scandir(class_path):
        if d.is_dir:
            paths = sorted(glob.glob(os.path.join(d.path, '*.npy')))
            # Add class idx to paths
            paths = [(p, c) for p in paths]
            class_image_paths.extend(paths)
            end_idx.extend([len(paths)])

end_idx = [0, *end_idx]
end_idx = torch.cumsum(torch.tensor(end_idx), 0)
seq_length = 6

sampler = MySampler(end_idx, seq_length)

dataset_train = MyDataset(image_paths=class_image_paths,seq_length=seq_length,length=len(sampler),batch=BATCH)

loader_train = DataLoader(dataset_train,batch_size=BATCH,sampler=sampler)

no_stand=0
no_walk=0
lab_list=[]
for data, target in tqdm(loader_train):
    lab=int(target[0][1])
    lab_list.append(lab)
    if lab ==0:
        no_stand+=1
    elif lab==1:
        no_walk+=1
arr = np.array(lab_list) 
uni=np.unique(arr)
print(uni)
print('stand: '+ str(no_stand)+ '\t walk: '+ str(no_walk))

模型架构:

class Net(nn.Module):

def __init__(self):
    super(Net, self).__init__()
    self.lstm = nn.LSTM(input_size=31,hidden_size=20, num_layers=2,dropout=0.5,batch_first=True).cuda()
    self.lstm2 = nn.LSTM(input_size=20,hidden_size=10, num_layers=2,dropout=0.5,batch_first=True).cuda()#,hidden_size =self.hidden_size)
    self.fc = nn.Linear(60, 2)
    nn.init.xavier_uniform_(self.fc.weight)
    self.self_attn = torch.nn.MultiheadAttention(embed_dim=20, num_heads=2)
    

def forward(self, x):
        torch.nan_to_num(x, nan=-1)
        x=x.view(BATCH,6, x.shape[2]))
        x, hidden = self.lstm(x)
        x, weight = self.self_attn(query=x, key=x, value=x)
        x, hidden = self.lstm2(x)
        x = x.contiguous().view(x.shape[0], x.shape[1]*x.shape[2])
        x = self.fc(x)
        return x
train_on_gpu = torch.cuda.is_available()
model=Net()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')
    
if train_on_gpu:
    model.cuda()

优化器和损失:

optimizer= optim.Adadelta(model.parameters(), lr=0.01, rho=0.9, eps=1e-06, weight_decay=0.01)
criterion=nn.BCEWithLogitsLoss()
scheduler=ReduceLROnPlateau(optimizer, mode='max', factor=0.7, patience=3,verbose=True)

火车循环:

n_epochs = 50
NEW_INI = True
loss_values_train=[]
loss_values_valid=[]
if NEW_INI:
    valid_loss_min = np.Inf
    NEW_INI = False
print(train_on_gpu)
for epoch in range(1, n_epochs+1):
    train_loss = 0.0
    valid_loss = 0.0

###################
# train the model #
###################
model.train()
for data, target in tqdm(loader_train):
    
    if train_on_gpu:
        data, target = data.cuda(), target.cuda()
    optimizer.zero_grad()
    output = model(data.float())
    #print(data.float())
    #print(output)
    loss = criterion(output, target.float().cuda())
    #print(loss)
    loss.backward()
    torch.nn.utils.clip_grad_value_(model.parameters(), 1)
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optimizer.step()
    train_loss += loss.item()*data.size(0)

######################    
# validate the model #
######################
model.eval()
for data, target in loader_valid:
    if train_on_gpu:
        data, target = data.cuda(), target.cuda()
    output=model(data.float())
    loss = criterion(output, target.float().cuda())
    valid_loss += loss.item()*data.size(0)

train_loss = train_loss/len(loader_train.sampler)
valid_loss = valid_loss/len(loader_valid.sampler)

loss_values_train.append(train_loss)
loss_values_valid.append(valid_loss)
#train_loss = train_loss/len(train_loader.sampler)
#print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))

print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \tlr: {:.6f}'.format(epoch, train_loss, valid_loss,optimizer.param_groups[0]['lr']))
if valid_loss <= valid_loss_min:
    print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
    torch.save(model.state_dict(), 'model_pose-v'+str(VERSION)+'.pt')
    valid_loss_min = valid_loss
#print('Epoch-{0} lr: {1}'.format(epoch, optimizer.param_groups[0]['lr']))

scheduler.step(valid_loss)

准确度为 47%。你能发现任何可能导致这个问题的东西吗?谢谢 :)

标签: pythondeep-learningneural-networkclassificationloss-function

解决方案


推荐阅读