Ошибка выполнения: Ожидаемый тензор для аргумента #1 «индексы» должен иметь скалярный тип Long

#python #pytorch #bert-language-model

Вопрос:

я пытаюсь работать с Бертом, но я продолжаю получать ошибку RuntimeError: Ожидаемый тензор для аргумента № 1 «индексы» должен иметь скалярный тип Long; но получил факел.Вместо этого intensor (при проверке аргументов для встраивания). Я не совсем понимаю, что я делаю не так на данном этапе. это обратная связь, которую я получаю:

 ---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-89-dcab947253f7> in <module>
     27 
     28     # Forward pass
---> 29     loss = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
     30     train_loss_set.append(loss.item())
     31     # Backward pass

~Anaconda3libsite-packagestorchnnmodulesmodule.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~Anaconda3libsite-packagespytorch_pretrained_bertmodeling.py in forward(self, input_ids, token_type_ids, attention_mask, labels)
    987 
    988     def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None):
--> 989         _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False)
    990         pooled_output = self.dropout(pooled_output)
    991         logits = self.classifier(pooled_output)

~Anaconda3libsite-packagestorchnnmodulesmodule.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~Anaconda3libsite-packagespytorch_pretrained_bertmodeling.py in forward(self, input_ids, token_type_ids, attention_mask, output_all_encoded_layers)
    728         extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0
    729 
--> 730         embedding_output = self.embeddings(input_ids, token_type_ids)
    731         encoded_layers = self.encoder(embedding_output,
    732                                       extended_attention_mask,

~Anaconda3libsite-packagestorchnnmodulesmodule.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~Anaconda3libsite-packagespytorch_pretrained_bertmodeling.py in forward(self, input_ids, token_type_ids)
    265             token_type_ids = torch.zeros_like(input_ids)
    266 
--> 267         words_embeddings = self.word_embeddings(input_ids)
    268         position_embeddings = self.position_embeddings(position_ids)
    269         token_type_embeddings = self.token_type_embeddings(token_type_ids)

~Anaconda3libsite-packagestorchnnmodulesmodule.py in _call_impl(self, *input, **kwargs)
    725             result = self._slow_forward(*input, **kwargs)
    726         else:
--> 727             result = self.forward(*input, **kwargs)
    728         for hook in itertools.chain(
    729                 _global_forward_hooks.values(),

~Anaconda3libsite-packagestorchnnmodulessparse.py in forward(self, input)
    124         return F.embedding(
    125             input, self.weight, self.padding_idx, self.max_norm,
--> 126             self.norm_type, self.scale_grad_by_freq, self.sparse)
    127 
    128     def extra_repr(self) -> str:

~Anaconda3libsite-packagestorchnnfunctional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   1850         # remove once script supports set_grad_enabled
   1851         _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1852     return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
   1853 
   1854 

RuntimeError: Expected tensor for argument #1 'indices' to have scalar type Long; but got torch.IntTensor instead (while checking arguments for embedding)
 

Код, который я использую, следующий:

 train_loss_set = []

# Number of training epochs (authors recommend between 2 and 4)
epochs = 10

# trange is a tqdm wrapper around the normal python range
for _ in trange(epochs, desc="Epoch"):
  
  
  # Training
  
  # Set our model to training mode (as opposed to evaluation mode)
  model.train()
  
  # Tracking variables
tr_loss = 0
nb_tr_examples, nb_tr_steps = 0, 0
  
  # Train the data for one epoch
for step, batch in enumerate(train_dataloader):
    # Add batch to CPU
    batch = tuple(t.to(device) for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
    # Clear out the gradients (by default they accumulate)
    optimizer.zero_grad()
   
    # Forward pass
    loss = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)
    train_loss_set.append(loss.item())    
    # Backward pass
    loss.backward()
    # Update parameters and take a step using the computed gradient
    optimizer.step()
    
    
    # Update tracking variables
    tr_loss  = loss.item()
    nb_tr_examples  = b_input_ids.size(0)
    nb_tr_steps  = 1

print("Train loss: {}".format(tr_loss/nb_tr_steps))
    
    
  # Validation

  # Put model in evaluation mode to evaluate loss on the validation set
model.eval()

  # Tracking variables 
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0

  # Evaluate data for one epoch
for batch in validation_dataloader:
    # Add batch to cpu
    batch = tuple(t.to(device) for t in batch)
    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch
    # Telling the model not to compute or store gradients, saving memory and speeding up validation
    with torch.no_grad():
      # Forward pass, calculate logit predictions
        logits = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    
        # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    tmp_eval_accuracy = flat_accuracy(logits, label_ids)
    
    eval_accuracy  = tmp_eval_accuracy
    nb_eval_steps  = 1

print("Validation Accuracy: {}".format(eval_accuracy/nb_eval_steps))