Пример классификации текста БЕРТА ошибка обучения

#python #keras

#питон #keras

Вопрос:

Когда я запускаю этот пример, он выдает мне ошибку, говоря::

tensorflow.python.framework.errors_impl.InvalidArgumentError: Недопустимое уменьшение размера (1 для ввода с 1 измерением(ами) [[узел Adam/clip_by_norm_196/Sum (определено в /miniforge3/envs/py38/lib/python3.8/пакеты сайтов/трансформаторы/моделирование_tf_utils.py:799) ]] [Op:__вывод_поезд_функция_19438]

Стек вызовов функций: train_function

Я не могу понять, в чем проблема. Кто-нибудь это понимает?

 from transformers import BertTokenizer import tensorflow as tf from sklearn.model_selection import train_test_split import pandas as pd import numpy as np from transformers import TFBertForSequenceClassification tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')   max_length = 25  batch_size=8   def split_dataset(df):  train_set, x = train_test_split(df,   stratify=df['label'],  test_size=0.1,   random_state=42)  val_set, test_set = train_test_split(x,   stratify=x['label'],  test_size=0.5,   random_state=43)   return train_set,val_set, test_set   df_raw = pd.read_csv("data.txt",sep="t",header=None,names=["text","label"])  # label df_label = pd.DataFrame({"label":["财经","房产","股票","教育","科技","社会","时政","体育","游戏","娱乐"],"y":list(range(10))}) df_raw = pd.merge(df_raw,df_label,on="label",how="left")  train_data,val_data, test_data = split_dataset(df_raw)  def convert_example_to_feature(review):  return tokenizer.encode_plus(review,   add_special_tokens = True, # add [CLS], [SEP]  max_length = 30, # max length of the text that can go to BERT  padding = 'max_length', # add [PAD] tokens  return_attention_mask = True, # add attention mask to not focus on pad tokens  truncation=True  )  # map to the expected input to TFBertForSequenceClassification, see here  def map_example_to_dict(input_ids, attention_masks, token_type_ids, label):  return {  "input_ids": input_ids,  "token_type_ids": token_type_ids,  "attention_mask": attention_masks,  }, label  def encode_examples(ds, limit=-1):  # prepare list, so that we can build up final TensorFlow dataset from slices.  input_ids_list = []  token_type_ids_list = []  attention_mask_list = []  label_list = []  if (limit gt; 0):  ds = ds.take(limit)   for index, row in ds.iterrows():  review = row["text"]  label = row["y"]  bert_input = convert_example_to_feature(review)     input_ids_list.append(bert_input['input_ids'])  token_type_ids_list.append(bert_input['token_type_ids'])  attention_mask_list.append(bert_input['attention_mask'])  label_list.append([label])  # try:  # tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, label_list)).map(map_example_to_dict)  # except:  # print(review)  # print(bert_input)  # exit()  return tf.data.Dataset.from_tensor_slices((input_ids_list, attention_mask_list, token_type_ids_list, label_list)).map(map_example_to_dict)   print("Encoding process") # train dataset ds_train_encoded = encode_examples(train_data).shuffle(10000).batch(batch_size) print(ds_train_encoded)  # val dataset ds_val_encoded = encode_examples(val_data).batch(batch_size) # test dataset ds_test_encoded = encode_examples(test_data).batch(batch_size)     # recommended learning rate for Adam 5e-5, 3e-5, 2e-5 learning_rate = 2e-5 # we will do just 1 epoch for illustration, though multiple epochs might be better as long as we will not overfit the model number_of_epochs = 8  # # model initialization model = TFBertForSequenceClassification.from_pretrained('bert-base-chinese', num_labels=10)  # # optimizer Adam recommended optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate,epsilon=1e-08, clipnorm=1)  # # we do not have one-hot vectors, we can use sparce categorical cross entropy and accuracy loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy') model.compile(optimizer=optimizer, loss=loss, metrics=[metric]) # # fit model print("Training process") bert_history = model.fit(ds_train_encoded, epochs=number_of_epochs, validation_data=ds_val_encoded) # # evaluate test set model.evaluate(ds_test_encoded)```  

Комментарии:

1. Я исправил это, удалив «clipnorm=1» в оптимизаторе, но я все еще не понимаю, почему.