Не удается преобразовать данные numpy в тензор GPU в TensorFlow2

#python #gpu #tensorflow2.0 #tensorflow2

#python #графический процессор #tensorflow2.0 #tensorflow2

Вопрос:

tensorflow2.1 python3.7

Недавно я сменил свои инструменты глубокого обучения с PyTorch на TensorFlow2.

Когда я создаю регрессионную модель на основе LSTM, происходят некоторые странные вещи. Я не могу преобразовать данные numpy в тензор GPU, который я использую в tf.data.Dataset .

Это код:

 import tensorflow
# some instance of my data
data=[[1,2,3,4],[1,3,2,1]]
labels=[[0.1,0.2,0.3,0.4],[0.5,0.5,0.6,0.5]]

# then  i build a dataset
class TermWeightDataset(object):
    """Dataset generator 方式读取"""
    def __init__(self,data_file,vocab,batch_size=50,max_count=100):
        self.vocab = vocab
        self.batch_size = batch_size
        self.data,self.labels,self.max_count = read_data(data_file,self.vocab,True)
        #max count 可以手工指定一个
        self.max_count = min(self.max_count,max_count)
        self.data = tf.keras.preprocessing.sequence.pad_sequences(self.data,maxlen=self.max_count,padding='post')
        self.labels = tf.keras.preprocessing.sequence.pad_sequences(self.labels,maxlen=self.max_count,padding='post',value=0.0,dtype="float64")
        # print(self.max_count)

    def generate(self):
        for example,label in zip(self.data,self.labels):
            yield tf.identity(example),tf.identity(label)

    def create_datasets(self):
        return tf.data.Dataset.from_generator(self.generate,(tf.int64,tf.float32)).padded_batch(self.batch_size,padded_shapes=(self.max_count,self.max_count))
        # return tf.data.Dataset.from_tensor_slices((self.data,self.labels)).batch(self.batch_size,drop_remainder=True)


# model
class LSTMBasedModel(tf.keras.Model):
    def __init__(self,vocab_size,
                      input_dim, 
                      hiddien_dim,
                      output_dim,
                      embedding_matrix=None
                    ):
        super(LSTMBasedModel,self).__init__()
        self.vocab_size = vocab_size
        self.input_dim = input_dim 
        self.hidden_dim = hiddien_dim
        self.output_dim = output_dim
        if isinstance(embedding_matrix,np.ndarray):
            emb_init = tf.keras.initializers.Constant(embedding_matrix)
            self.embedding = tf.keras.layers.Embedding(self.vocab_size,self.input_dim,embeddings_initializer=emb_init) #trainable=False
        else:
            self.embedding = tf.keras.layers.Embedding(self.vocab_size, self.input_dim)
        self.rnn = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(hiddien_dim,return_sequences=False)) #返回最后的hidden state
        self.dense = tf.keras.layers.Dense(units = self.output_dim)

    def call(self,inputs):
        tensor = self.embedding(inputs)
        state = self.rnn(tensor)
        out = self.dense(state)
        return out


  

Затем я обучаю свою модель в режиме ожидания, обучающий код выглядит следующим образом:

 def train_one_step(model,inputs_batch,labels_batch,loss_function,optimizer):
    with tf.GradientTape() as tape:
        logits = model(inputs_batch)
        loss = loss_function(logits,labels_batch)
        loss = tf.reduce_mean(loss)
    grads = tape.gradient(loss,model.variables)
    optimizer.apply_gradients(grads_and_vars=zip(grads,model.variables))
    return logits,loss

def train(model,dataset,epochs,loss_function,optimizer,ckpt,writer):
    step = 0
    for epoch in range(epochs):
        min_loss = float("inf")
        all_loss = 0
        size = 0
        for idx, (inputs_batch,labels_batch) in enumerate(dataset):
            # print(inputs_batch)
            inputs_batch = tf.identity(inputs_batch)
            labels_batch = tf.identity(labels_batch)
            logits,loss = train_one_step(model,inputs_batch,labels_batch,loss_function,optimizer)
            step  = 1
            size  = 1
            all_loss  = loss.numpy()
            if step %100 ==0:
                logging.info("epoch:{}, step:{}, loss:{:.4f}".format(epoch,step,loss))
                with writer.as_default():
                    tf.summary.scalar("batch loss",loss,step=step)
                if min_loss>loss:
                    min_loss = loss 
                    ckpt.save()
                    logging.info("model save:{}".format(step))
        with writer.as_default():
            logging.info("epoch:{},  epoch loss:{:.4f}".format(epoch,all_loss/size))
            tf.summary.scalar("epoch loss",all_loss/size,step=epoch)
  

Затем мой код запускается и всегда работает на CPU, а не на GPU, а скорость использования GPU всегда равна 0.

Но, когда я предоставляю некоторый случайный data(np.random.rand(100,22)) , графический процессор работает.

Извините за недостающую информацию журнала.

Обновить

 2020-09-09 15:41:20.795645: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-09-09 15:41:25.905156: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libnvinfer.so.6
2020-09-09 15:41:25.935928: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libnvinfer_plugin.so.6'; dlerror: libnvrtc.so.10.2: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/usr/local/nvidia/lib64/:/home/hdp-map/cuda-10.0/lib64
2020-09-09 15:41:25.935988: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:30] Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
2020-09-09 15:41:32.316385: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2020-09-09 15:41:32.384586: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: 
pciBusID: 0000:86:00.0 name: Tesla K80 computeCapability: 3.7
coreClock: 0.8235GHz coreCount: 13 deviceMemorySize: 11.92GiB deviceMemoryBandwidth: 223.96GiB/s
2020-09-09 15:41:32.384654: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-09-09 15:41:32.384705: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-09-09 15:41:32.502781: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-09-09 15:41:32.738911: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-09-09 15:41:32.932498: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-09-09 15:41:33.036902: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-09-09 15:41:33.037047: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-09-09 15:41:33.051746: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-09-09 15:41:33.171633: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2399770000 Hz
2020-09-09 15:41:33.173189: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x562238f2c200 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-09-09 15:41:33.173261: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2020-09-09 15:41:33.272368: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x56223790cb60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-09-09 15:41:33.272459: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Tesla K80, Compute Capability 3.7
2020-09-09 15:41:33.274399: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1555] Found device 0 with properties: 
pciBusID: 0000:86:00.0 name: Tesla K80 computeCapability: 3.7
coreClock: 0.8235GHz coreCount: 13 deviceMemorySize: 11.92GiB deviceMemoryBandwidth: 223.96GiB/s
2020-09-09 15:41:33.274461: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-09-09 15:41:33.274494: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcublas.so.10
2020-09-09 15:41:33.274533: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcufft.so.10
2020-09-09 15:41:33.274559: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcurand.so.10
2020-09-09 15:41:33.274584: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusolver.so.10
2020-09-09 15:41:33.274618: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcusparse.so.10
2020-09-09 15:41:33.274642: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudnn.so.7
2020-09-09 15:41:33.277794: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1697] Adding visible gpu devices: 0
2020-09-09 15:41:33.277896: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1
2020-09-09 15:41:39.052053: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1096] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-09-09 15:41:39.052127: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1102]      0 
2020-09-09 15:41:39.052147: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] 0:   N 
2020-09-09 15:41:39.068973: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1241] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 11483 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:86:00.0, compute capability: 3.7)
run.sh: line 13: 21205 Killed                  /opt/conda/bin/python3 main.py --data data/ --vocab misc/vocab.txt --tensorboard_dir log/ --batch_size 64 --embedding_path misc/glove.vec.txt --lr 0.0001 --output_dir ckpt/ --epochs 20 --gpus 0
  

Комментарии:

1. Можете ли вы опубликовать полный вывод журнала вашей программы по мере ее запуска? (Все эти информационные сообщения из tensorflow могут содержать некоторую полезную информацию)

2. Я обновляю полный журнал