windows-10 - Windows 上的 Pytorch:numworkers 的数据加载器问题
问题描述
我刚买了一台运行 Windows 10 的新计算机,它有一个 GPU,所以我想看看我是否可以明智地将它用于机器学习。所以我尝试运行一个我之前在 Google Colab 上训练过的旧模型。答案是它确实做得很好,但我发现我不能在 Dataloader 中使用多个工人。谷歌搜索发现这是 Jupyter Notebooks 中 Windows 上 PyTorch 的一个已知问题,因此我尝试在普通 Python 程序中运行它。我发现它确实有效,但创建 DataIterator 需要很长时间。以下是 1、2 和 6 名工人的时间(以秒为单位),每人完成两次:
我注意到 2 名工人似乎是最快的,而且似乎有很多变化,这让我感到惊讶,因为机器什么也没做。
所以第一个问题是:
- 有没有办法让 PyTorch 选择最有效的工人数量来使用?
第二个问题是:
- 如果我安装一个 Linux 版本,我将能够在多个工作人员中使用 Jupyter Notebooks,这是我在理想世界中更愿意做的事情。
我运行的代码在相关部分之后是if __name__ == "__main__"
:
# -*- coding: utf-8 -*-
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset,DataLoader
import os
import numpy as np
#import gym
import pickle
import matplotlib.pyplot as plt
import time
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
# create dataset
class C4Dataset(Dataset):
'''
the data for the first 12moves is held in a pickles list
as (key,val)
The key has to be converted to the pos amd mask which can then be converted to the ones,twos and zeros
Val is the value for the player playing so needs to be changed for wnen moves i odd to minus val
'''
fileName = r'C:\Users\alan\Desktop\Python\Python36\connect4\Layers\ListAllKeyVal19'
bottom_mask = 4432676798593
board_mask = bottom_mask * ((1 << 6) - 1)
bNos = 2**np.arange(49, dtype = np.uint64)
def getData(fileName):
with open(fileName,'rb') as inFile:
dict = pickle.load(inFile)
return dict
def oneHot(x):
return np.eye(37,dtype = np.float32)[x]
def getNoMoves(ones,twos) :
return np.sum(ones+twos)
def getPosMask(key):
binary = ('{:049b}'.format(key))[::-1]
arr = np.frombuffer(binary.encode(encoding='utf-8', errors='strict'),'u1') - ord('0')
outArr = np.reshape(arr,(7,7),order = 'F')
arr = np.flipud(outArr)
pos = arr.copy()
mask =arr.copy()
for col in range(7):
res = np.where(arr[:,col]==1)
topPos = res[0][0]
pos[topPos,col] = 0
mask[topPos,col] = 0
if topPos<6:
mask[topPos+1:,col] = 1
msk = np.flipud(mask)
msk = np.reshape(msk,(49),order = 'F')
maskNo = np.array(msk.dot(C4Dataset.bNos),dtype = np.uint64).item()
return pos.astype('float32'),(pos ^ mask).astype('float32'),(np.logical_not(mask)).astype('float32'),maskNo
def possible(mask) :
poss = (mask + C4Dataset.bottom_mask) & C4Dataset.board_mask
binary = ('{:049b}'.format(poss))[::-1]
arr = np.frombuffer(binary.encode(encoding='utf-8', errors='strict'),'u1') - ord('0')
outArr = np.reshape(arr,(7,7),order = 'F')
arr = np.flipud(outArr)
return arr
def __init__(self):
self.lst = C4Dataset.getData(C4Dataset.fileName)
def __len__(self):
return len(self.lst)
def __getitem__(self, idx):
key,val = self.lst[idx]
val = int(val)
ones,twos,zeros,mask = C4Dataset.getPosMask(key)
arr = np.zeros((5,7,7),dtype = np.float32)
arr[0,:6,:7] = ones[1:,:]
arr[1,:6,:7] = twos[1:,:]
arr[2,:6,:7] = zeros[1:,:]
moves = int(C4Dataset.getNoMoves(ones,twos))
p = (moves % 2) + 3
arr[p,:6,:7] = C4Dataset.possible(mask)[1:,:]
return arr,val+18 #C4Dataset.oneHot(val+18)
class C4Net(nn.Module):
def __init__(self, inFilters,outFilters):
super(C4Net, self).__init__()
self.conv1 = nn.Conv2d(inFilters, 32, kernel_size=3,
stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(32)
self.layer1 = BasicBlock(32,32)
self.layer2 = BasicBlock(32,32)
self.layer3 = BasicBlock(32,32)
self.layer4 = BasicBlock(32,32)
self.layer5 = BasicBlock(32,32)
self.layer6 = BasicBlock(32,32)
self.layer7 = BasicBlock(32,32)
self.layer8 = BasicBlock(32,32)
self.linear = nn.Linear(32*7*7,outFilters)#1568
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = self.layer5(out)
out = self.layer6(out)
out = self.layer7(out)
out = self.layer8(out)
#out = F.avg_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
# show some images
def show(img):
npimg = img.numpy()[:3,:,:]
plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest')
# get some random training images
if __name__ == "__main__":
dirName =r'C:\Users\alan\Desktop\Python\Python36\connect4\Layers'
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assuming that we are on a CUDA machine, this should print a CUDA device:
print(device)
# create dataloader
max_epochs = 1
batchSize = 1024#512#256
learningRate = .00003
# Parameters
params = {'batch_size': batchSize,'shuffle': True,'num_workers': 2}
# Generators
dataset = C4Dataset()
start = time.time()
dataloader = DataLoader(dataset, **params)
middle = time.time()
print('create dataloader',middle-start)
dataiter = iter(dataloader)
end = time.time()
print('create data iterator',end-middle)
images, labels = dataiter.next()
final = time.time()
print('get one batch',final-end)
# show images
show(torchvision.utils.make_grid(images[:16]))
#create the weights
wts =np.array([59, 963, 12406, 148920, 62551, 47281, 55136, 54312, 44465, 31688,
27912, 37907, 114778, 242800, 394530, 495237, 582174, 163370, 480850,
201152, 690905, 633937, 721340, 372479, 193375, 84648, 76576, 91087, 130428,
154184, 157339, 156453, 227696, 1705325, 548155, 44315, 2082],dtype = np.float32)
maxwt = wts.max()
weights = wts/maxwt
weights = torch.from_numpy(weights)
weights.to(device)
# create the network
net = C4Net(5,37)
net.to(device)
PATH = r'C:\Users\alan\Desktop\Python\connectX\c4Net37Weights00003.pth'
net.load_state_dict(torch.load(PATH,map_location=torch.device(device)))
#create the loss function and optimiser
criterion = nn.CrossEntropyLoss(weight = weights.to(device) )
optimizer = optim.Adam(net.parameters(), lr=learningRate)
#train the network
import time
start = time.time()
for epoch in range(max_epochs): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(dataloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = inputs, labels = data[0].to(device), data[1].to(device)
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
torch.save(net.state_dict(),r'C:\Users\alan\Desktop\Python\connectX\tempWeights')
print('Finished Training')
# save the weights
PATH = r'C:\Users\alan\Desktop\Python\connectX\c4Net37Weights00004.pth'
torch.save(net.state_dict(), PATH)
end = time.time()
print('elapsed time',end-start)
PS 该机器是戴尔 XPS 17,配备 8 核 Intel Core i9-10885H,GPU 是配备 Max-Q 的 NVIDIA GeForce RTX 2060。在这项测试中,它的运行速度比在 Google Colab 上快 4 倍,但我不知道分配给我的是什么 GPU。
解决方案
推荐阅读
- google-apps-script - 将数据从工作表复制到文档并生成新文档
- javascript - HTMLMediaElement 上不存在 setSinkId
- ubuntu - gunicorn 无法启动烧瓶项目 ec2 ubuntu
- gcc - gcc 交叉编译 -mips1 不生成 MIPS-I
- mysql - 无法在 WSL 中成功安装 mysql-server
- javascript - null 不是对象(正在评估 'document.getElementById('logout-form').submit') - 为什么会出现此错误?
- powershell - 如何使用 Powershell 递归多次移动文件?
- apache-nifi - NiFi - 提高 QueryDatabaseTable 性能以超越 Sqoop
- c - 使用 c 图形的赛车游戏
- java - Firebase 实时数据库非常高的下载使用率