#tensorflow #synchronization #gpu
#tensorflow #синхронизация #графический процессор
Вопрос:
#tokenization 정의 - vocab를 이용하여
from bert.tokenization.albert_tokenization import FullTokenizer
def createTokenizer():
return FullTokenizer("../albert_base/assets/30k-clean.vocab", spm_model_file="../albert_base/assets/30k-clean.model", do_lower_case=True)
def get_masks(tokens, max_seq_length):
"""Mask for padding"""
if len(tokens)>max_seq_length:
#Cutting down the excess length
tokens = tokens[0:max_seq_length]
return [1]*len(tokens)
else :
return [1]*len(tokens) [0] * (max_seq_length - len(tokens))
def get_segments(tokens, max_seq_length):
if len(tokens)>max_seq_length:
#Cutting down the excess length
tokens = tokens[:max_seq_length]
segments = []
current_segment_id = 0
for token in tokens:
segments.append(current_segment_id)
if token == "[SEP]":
current_segment_id = 1
return segments
else:
segments = []
current_segment_id = 0
for token in tokens:
segments.append(current_segment_id)
if token == "[SEP]":
current_segment_id = 1
return segments [0] * (max_seq_length - len(tokens))
def get_ids(tokens, tokenizer, max_seq_length):
if len(tokens)>max_seq_length:
tokens = tokens[:max_seq_length]
token_ids = tokenizer.convert_tokens_to_ids(tokens)
return token_ids
else:
token_ids = tokenizer.convert_tokens_to_ids(tokens)
input_ids = token_ids [0] * (max_seq_length-len(token_ids))
return input_ids
tokenizer = createTokenizer()
max_seq_length = 2 #This number will determine the number of tokens
def prep(s, get = 'id'):
stokens = tokenizer.tokenize(s)
stokens = ["[CLS]"] stokens ["[SEP]"]
if get == 'id':
input_ids = get_ids(stokens, tokenizer, max_seq_length)
return input_ids
elif get == 'mask':
input_masks = get_masks(stokens, max_seq_length)
return input_masks
else:
input_segments = get_segments(stokens, max_seq_length)
return input_segments
#train과 test 데이터 로드
import pandas as pd
import pickle
train_set = pd.read_csv("../goemotion/train_set.csv")
test_set = pd.read_csv("../goemotion/test_set.csv")
train_X = [prep(sentence) for sentence in train_set["text"]]
#with open("train" str(max_seq_length) ".pickle", 'rb') as f:
# train_X = pickle.load(f)
train_Y = list(map(int, train_set["emotion"].tolist()))
test_X = [prep(sentence) for sentence in test_set["text"]]
#with open("test" str(max_seq_length) ".pickle", 'rb') as f:
# test_X = pickle.load(f)
test_Y = list(map(int, test_set["emotion"].tolist()))
with open("train" str(max_seq_length) ".pickle", 'wb') as f:
pickle.dump(train_X, f)
with open("test" str(max_seq_length) ".pickle", 'wb') as f:
pickle.dump(test_X, f)
print("data preprocess finished")
#albert 모델 호출
import os
import bert
import tensorflow as tf
"""
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
"""
tf.config.experimental.set_memory_growth(tf.config.experimental.list_physical_devices("GPU")[0], True)
'''with tf.device("CPU"):
train_X = tf.identity(train_X)
train_Y = tf.identity(train_Y)
test_X = tf.identity(test_X)
test_Y = tf.identity(test_Y)'''
#model calling No.1
model_name = "albert_base_v2"
#model_dir = bert.fetch_google_albert_model(model_name, ".models")
model_ckpt = os.path.join("../albert_base", "model.ckpt-best")
model_params = bert.albert_params("../albert_base/")
albert_layer = bert.BertModelLayer.from_params(model_params, name="albert")
model_layer = tf.keras.Sequential([
tf.keras.layers.Input(shape=(max_seq_length,), dtype="int32", name="input_ids"),
albert_layer,
#tf.keras.layers.Flatten(),
tf.keras.layers.Dense(112, activation=tf.nn.relu),
tf.keras.layers.Dense(27, activation=tf.nn.softmax),#0~27
tf.keras.layers.Dense(1, activation=tf.nn.softmax)
])
model_layer.build(input_shape=(None, max_seq_length))
bert.load_albert_weights(albert_layer, model_ckpt)
#
model_layer.compile(loss="sparse_categorical_crossentropy", optimizer=tf.optimizers.Adam(lr=0.00001), metrics=["sparse_categorical_accuracy"])
print(model_layer.summary())
#훈련 시작
checkpointName = os.path.join("../albert_base/models/", "albert_faq.ckpt")
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpointName,
save_weights_only=True,
verbose=1)
history = model_layer.fit(
test_X,
test_Y,
epochs=300,
validation_data=(train_X, train_Y),
verbose=1,
callbacks=[cp_callback],
batch_size=2)
выше приведен мой код и
-----------------------------------------------------------------------------
| NVIDIA-SMI 455.23.04 Driver Version: 455.23.04 CUDA Version: 11.1 |
|------------------------------- ---------------------- ----------------------
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=============================== ====================== ======================|
| 0 GeForce RTX 3090 On | 00000000:09:00.0 On | N/A |
| 33% 53C P2 111W / 350W | 1016MiB / 24265MiB | 1% Default |
| | | N/A |
------------------------------- ---------------------- ----------------------
это nvidia-smi
Я использую tensorflow-gpu 2.2 и cuda toolkit 10.1 и cudnn 7.6
Мой компьютер — 3900X 128 ГБ (ОЗУ) RTX3090 500 ГБ (SSD)
и если выполнить вышеуказанное сообщение об ошибке кода ниже.
File "/home/sentiment/anaconda3/envs/mybert/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 6606, in raise_from_not_ok_status
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:GPU:0 to /job:localhost/replica:0/task:0/device:CPU:0 in order to run Identity: GPU sync failed [Op:Identity]
Я хочу обучить Альберта точной настройке.
если я использую tensorflow для процессора, он работает нормально, но 1 эпоха за 6 часов для обучения.
поэтому я надеюсь использовать gpu
Мне действительно трудно найти решение для исправления, но не удалось.
кто-нибудь знает, как исправить эту ошибку?
Комментарии:
1. строка
with tf.device("CPU"):
выглядит странно. Почему вы используете CPU при обучении на GPU?2. @Andrey о, это ошибка для записи. кто-то сказал, что этот код работает, но это не так.