https://discuss.pytorch.org/t/cpu-faster-than-gpu/25343/12
/>
So now, I am sending my trainingdata.data and .targets to cuda before starting training. What I am confused about it how to then use the Dataloader made off of the trainingdata in my train function, as when I try the way I had before (when I was sending individual batches to cuda), I get this error on the iterator of the dataloader:
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
It is erroring on the following line (loader is the dataloader here):
for data, target in loader:
What am I doing wrong? How can I still use the dataloader while also sending the full dataset over to the gpu?
Python version is 3.12, pytorch is 2.3.0+cu121
Also, the error is the same if I dont include the pin_memory var to the dataloader
Full error trace:
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm Community Edition 2024.1.2\plugins\python-ce\helpers\pydev\pydevd.py", line 1537, in _exec
pydev_imports.execfile(file, globals, locals) # execute the script
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\JetBrains\PyCharm Community Edition 2024.1.2\plugins\python-ce\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:\Users\me\PycharmProjects\NueralNetTests\TorchTests.py", line 181, in
main()
File "C:\Users\me\PycharmProjects\NueralNetTests\TorchTests.py", line 169, in main
train(epoch, model, loaders, device, optimizer, lossFN)
File "C:\Users\me\PycharmProjects\NueralNetTests\TorchTests.py", line 41, in train
for data, target in loaders['train']:
File "C:\Users\me\PycharmProjects\NueralNetTests\venv\Lib\site-packages\torch\utils\data\dataloader.py", line 631, in __next__
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "C:\Users\me\PycharmProjects\NueralNetTests\venv\Lib\site-packages\torch\utils\data\dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\me\PycharmProjects\NueralNetTests\venv\Lib\site-packages\torch\utils\data\_utils\fetch.py", line 51, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
~~~~~~~~~~~~^^^^^
File "C:\Users\me\PycharmProjects\NueralNetTests\venv\Lib\site-packages\torchvision\datasets\mnist.py", line 143, in __getitem__
img = Image.fromarray(img.numpy(), mode="L")
^^^^^^^^^^^
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
and The code:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
_transform = Compose([
lambda img: rotate(img, -90),
lambda img: hflip(img),
ToTensor()
])
trainData = datasets.EMNIST(
root='data',
train=True,
transform=_transform,
download=True,
split='letters'
)
testData = datasets.EMNIST(
root='data',
train=False,
transform=_transform,
download=True,
split='letters'
)
trainLoader = DataLoader(trainData,
batch_size=100,
shuffle=True,
pin_memory=True
)
testLoader = DataLoader(testData,
batch_size=100,
shuffle=True,
pin_memory=True
)
trainData.data = trainData.data.to(device)
trainData.targets = trainData.targets.to(device)
testData.data = testData.data.to(device)
testData.targets = testData.targets.to(device)
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
lossFN = nn.CrossEntropyLoss()
for epoch in range(1, 2):
train(model, trainLoader, device, optimizer, lossFN)
test(model, testLoader, device, lossFN)
def train(model, loader, device, optimizer, lossFN):
model.train()
for data, target in loader:
optimizer.zero_grad()
output = model(data)
loss = lossFN(output, target)
loss.backward()
optimizer.step()
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.conv2Drop = nn.Dropout2d()
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 27)
def forward(self, x):
x = F.leaky_relu(F.max_pool2d(self.conv1(x), 2))
x = F.leaky_relu(F.max_pool2d(self.conv2Drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.leaky_relu(self.fc1(x))
x = F.dropout(x, training=self.training)
x = self.fc2(x)
return F.softmax(x)
0 comments:
Post a Comment
Thanks