«Ошибка выполнения: ожидаемый скалярный тип Long, но найден с плавающей точкой» в прямом процессе

#python #machine-learning #neural-network #pytorch

#python #машинное обучение #нейронная сеть #pytorch

Вопрос:

Я пытаюсь создать модель с помощью PyTorch, но во время пересылки я сталкиваюсь с этой проблемой «RuntimeError: ожидаемый скалярный тип Long, но найден с плавающей точкой».

Исходный код:

  • dataset_loader.py:
 import h5py
import torch
import numpy as np

from PIL import Image
from os import listdir
from sklearn.utils import shuffle

WIDTH = 64
HEIGHT = 64
CREATE_DATASET = False


def load_set(path: str):
    dataset = []
    for f in listdir(path):
        dataset.append(np.asarray(Image.open(path   f).resize((WIDTH, HEIGHT)).convert('RGB'), dtype=np.int32).reshape(3, WIDTH, HEIGHT))

    return np.array(dataset, dtype=np.int32)


def create_batch(dataset, labels, batch_size):
    dataset_result = []
    labels_result = []
    data_batch = []
    label_batch = []

    for index in range(len(dataset)):
        if len(data_batch) == batch_size:
            # dataset_result.append(np.array(data_batch, dtype=np.int32))
            # labels_result.append(np.array(label_batch, dtype=np.int32))
            dataset_result.append(data_batch)
            labels_result.append(label_batch)
            data_batch = []
            label_batch = []
        else:
            # data_batch.append(torch.tensor(dataset[index]))
            # label_batch.append(torch.tensor(labels[index]))
            data_batch.append(np.array(dataset[index], dtype=np.int32))
            label_batch.append(np.array(labels[index], dtype=np.int32))

    dataset_result = np.array(dataset_result, dtype=np.int32)
    labels_result = np.array(labels_result, dtype=np.int32)
    return torch.from_numpy(dataset_result), torch.from_numpy(labels_result)
    # return dataset_result, labels_result


def create_dataset():
    dataset_file = h5py.File("dataset.hdf5", "w")

    train_normal = load_set("chest_xray/train/NORMAL/")
    dataset_file.create_dataset("train_normal", train_normal.shape, dtype=np.int32, data=train_normal)

    train_pneumonia = load_set("chest_xray/train/PNEUMONIA/")
    dataset_file.create_dataset("train_pneumonia", train_pneumonia.shape, dtype=np.int32, data=train_pneumonia)

    test_normal = load_set("chest_xray/test/NORMAL/")
    dataset_file.create_dataset("test_normal", test_normal.shape, dtype=np.int32, data=test_normal)

    test_pneumonia = load_set("chest_xray/test/PNEUMONIA/")
    dataset_file.create_dataset("test_pneumonia", test_pneumonia.shape, dtype=np.int32, data=test_pneumonia)

    val_normal = load_set("chest_xray/val/NORMAL/")
    dataset_file.create_dataset("val_normal", val_normal.shape, dtype=np.int32, data=val_normal)

    val_pneumonia = load_set("chest_xray/val/PNEUMONIA/")
    dataset_file.create_dataset("val_pneumonia", val_pneumonia.shape, dtype=np.int32, data=val_pneumonia)


def load_dataset():
    dataset = h5py.File('dataset.hdf5', 'r')

    train_set = np.array(list(dataset["train_normal"])   list(dataset["train_pneumonia"]), dtype=np.int32)
    test_set = np.array(list(dataset["test_normal"])   list(dataset["test_pneumonia"]), dtype=np.int32)
    val_set = np.array(list(dataset["val_normal"])   list(dataset["val_pneumonia"]), dtype=np.int32)

    train_labels = [0] * len(dataset["train_normal"])   [1] * len(dataset["train_pneumonia"])
    test_labels = [0] * len(dataset["test_normal"])   [1] * len(dataset["test_pneumonia"])
    val_labels = [0] * len(dataset["val_normal"])   [1] * len(dataset["val_pneumonia"])

    BATCH_SIZE = 32

    train_set, train_labels = shuffle(np.array(train_set, dtype=np.int32), np.array(train_labels, dtype=np.int32))
    train_set, train_labels = create_batch(train_set, train_labels, BATCH_SIZE)

    test_set, test_labels = shuffle(np.array(test_set, dtype=np.int32), np.array(test_labels, dtype=np.int32))
    test_set, test_labels = create_batch(test_set, test_labels, BATCH_SIZE)

    val_set, val_labels = shuffle(np.array(val_set, dtype=np.int32), np.array(val_labels, dtype=np.int32))
    val_set, val_labels = create_batch(val_set, val_labels, BATCH_SIZE)

    return train_set, train_labels, test_set, test_labels, val_set, val_labels, BATCH_SIZE
  
  • network.py:
 import torch.nn as nn
import torch.nn.functional as F

class Network(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5)
    self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)

    self.fc1 = nn.Linear(in_features=12*5*5, out_features=120)
    self.fc2 = nn.Linear(in_features=120, out_features=60)
    self.out = nn.Linear(in_features=60, out_features=2)

  def forward(self, t):
    t = self.conv1(t)
    t = F.relu()
    t = F.max_pool2d(t, kernel_size=2, stride=2)

    t = F.relu(self.conv2(t))
    t = F.max_pool2d(t, kernel_size=2, stride=2)

    t = t.reshape(-1, 12 * 4 * 4)
    t = F.relu(self.fc1(t))
    t = F.relu(self.fc2(t))
    t = self.out(t)

    return t
  
  • main.py:
 import numpy as np
import torch
import torch.optim as optim
import torch.nn.functional as F

import network
import dataset_loader

import matplotlib.pyplot as plt

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

WIDTH = 64
HEIGHT = 64
NEED_TO_CREATE_DATASET = False

if NEED_TO_CREATE_DATASET:
    dataset_loader.create_dataset()

train_set, train_labels, test_set, test_labels, val_set, val_labels, BATCH_SIZE = dataset_loader.load_dataset()

TRAINING_SIZE = len(train_set) * BATCH_SIZE
TESTING_SIZE = len(test_set) * BATCH_SIZE

EPOCHS = 5
LEARNING_RATE = 0.01

network = network.Network().to(device)
optimizer = optim.Adam(network.parameters(), lr=LEARNING_RATE)

training_losses = []
training_accuracies = []

testing_losses = []
testing_accuracies = []

def get_num_correct(preds, labels):
    return preds.argmax(dim=1).eq(labels).sum().item()

def train():
    network.train()
    correct_in_episode = 0
    episode_loss = 0

    for index, images in enumerate(train_set):
        labels = train_labels[index]

        print(images.shape)
        # exit()
        predictions = network(images.type(torch.LongTensor)) # TODO: fix crash "RuntimeError: expected scalar type Long but found Float"
        # predictions = network(images)
        loss = F.cross_entropy(predictions, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        episode_loss  = loss.item()
        correct_in_episode  = get_num_correct(predictions, labels)

    training_losses.append(episode_loss)
    training_accuracies.append(correct_in_episode * 100 / TRAINING_SIZE)
    print(f"Epoch: {epoch   1} accuracy: {correct_in_episode * 100 / TRAINING_SIZE:.2f} loss: {episode_loss:.3f}", end="t")


def test():
    network.eval()
    episode_loss = 0
    correct_in_episode = 0

    with torch.no_grad():
        for index, images in enumerate(test_set):
            labels = test_labels[index]

            predictions = network(images)
            loss = F.cross_entropy(predictions, labels)

            episode_loss = loss.item()
            correct_in_episode  = get_num_correct(predictions, labels)

    testing_losses.append(episode_loss)
    testing_accuracies.append(correct_in_episode * 100 / TESTING_SIZE)
    print(f'Validation: Accuracy: {correct_in_episode * 100 / TESTING_SIZE:.2f} loss: {episode_loss:.3f}')

for epoch in range(EPOCHS):
    train()
    test()

fig = plt.figure()

plt.plot(list(range(1, len(training_losses) 1)), training_losses, color='blue')
plt.plot(list(range(1, len(testing_losses) 1)), testing_losses, color='red')

plt.legend(['Train Loss', 'Test Loss'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('Loss')

fig = plt.figure()

plt.plot(list(range(1, len(training_accuracies) 1)), training_accuracies, color='blue')
plt.plot(list(range(1, len(testing_accuracies) 1)), testing_accuracies, color='red')

plt.legend(['Train Accuracy', 'Test Accuracy'], loc='upper right')
plt.xlabel('number of training examples seen')
plt.ylabel('Accuracy')
  

Ошибка:

 torch.Size([32, 3, 64, 64])
Traceback (most recent call last):
  File "Soluce.py", line 86, in <module>
    train()
  File "Soluce.py", line 50, in train
    predictions = network(images.type(torch.LongTensor)) # TODO: fix crash "RuntimeError: expected scalar type Long but found Float"
  File "/home/thytu/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/thytu/Prog/PoC/pool_2021/Day3/Admin-XRAI/Admin/network.py", line 15, in forward
    t = self.conv1(t)
  File "/home/thytu/.local/lib/python3.8/site-packages/torch/nn/modules/module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/thytu/.local/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 423, in forward
    return self._conv_forward(input, self.weight)
  File "/home/thytu/.local/lib/python3.8/site-packages/torch/nn/modules/conv.py", line 419, in _conv_forward
    return F.conv2d(input, weight, self.bias, self.stride,
RuntimeError: expected scalar type Long but found Float
  

Есть ли у вас какие-либо идеи, в чем проблема?
Спасибо за ваше время и вашу помощь.

(PS: речь идет не о кросс-энтропии, это происходит раньше)

Комментарии:

1. Эта строка: t = F.relu() . Вы забыли передать t для активации relu.

2. Ty, но это не проблема. Я скоро загружу ответ.

Ответ №1:

Я также столкнулся с той же проблемой. Мой код выглядит следующим образом:

  • мой код:

     import torch
    from torchvision.models import alexnet
    
    
    model = alexnet(pretrained=True)
    
    input_data = torch.randint(255, size=(1, 3, 224, 224), dtype=torch.long)
    outputs = model(input_data)
    print(outputs)
      

очень простая операция, но это ошибка:

 Traceback (most recent call last):
  File "D:Program FilesJetBrainsPyCharm 2021.1.3pluginspythonhelperspydevpydevd.py", line 1483, in _exec
    pydev_imports.execfile(file, globals, locals)  # execute the script
  File "D:Program FilesJetBrainsPyCharm 2021.1.3pluginspythonhelperspydev_pydev_imps_pydev_execfile.py", line 18, in execfile
    exec(compile(contents "n", file, 'exec'), glob, loc)
  File "F:/Python/TF2/main.py", line 9, in <module>
    outputs = model(input_data)
  File "D:Anacondaenvstf2xlibsite-packagestorchnnmodulesmodule.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "D:Anacondaenvstf2xlibsite-packagestorchvisionmodelsalexnet.py", line 46, in forward
    x = self.features(x)
  File "D:Anacondaenvstf2xlibsite-packagestorchnnmodulesmodule.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "D:Anacondaenvstf2xlibsite-packagestorchnnmodulescontainer.py", line 139, in forward
    input = module(input)
  File "D:Anacondaenvstf2xlibsite-packagestorchnnmodulesmodule.py", line 1051, in _call_impl
    return forward_call(*input, **kwargs)
  File "D:Anacondaenvstf2xlibsite-packagestorchnnmodulesconv.py", line 443, in forward
    return self._conv_forward(input, self.weight, self.bias)
  File "D:Anacondaenvstf2xlibsite-packagestorchnnmodulesconv.py", line 440, in _conv_forward
    self.padding, self.dilation, self.groups)
RuntimeError: expected scalar type Long but found Float
  

и, но я думаю, что это проблема с весом, но я сразу же изменил эту идею. Поскольку это только официальная модель, проблема заключается во входных данных. Поэтому я снова изменил код:

  • Изменен код:

     input_data = torch.rand(size=(1, 3, 224, 224))
    outputs = model(input_data)
    print(outputs)
      

и тогда все в порядке:

 tensor([[-1.5024e 00, -1.1394e 00, -3.7661e-01,  1.1497e 00,  1.1878e-01,
     -6.6696e-01,  3.8399e-01, -1.0095e 00, -1.3813e 00, -1.4772e 00,
     ...
Process finished with exit code 0
  

Поэтому я предлагаю, чтобы вы могли попытаться изменить тип входных данных.