Стек вызовов функций: train_function, распознавание цвета транспортного средства

#python #tensorflow #deep-learning

Вопрос:

У меня есть эта модель VGG 16:

 def vgg16(trainImages, trainLabels, testImages, testLabels):
    trainImages = np.array(trainImages)
    trainLabels = np.array(trainLabels)
    testImages = np.array(testImages)
    testLabels = np.array(testLabels)

    model = Sequential()

    model.add(Conv2D(input_shape = (224, 224, 3), filters = 64, kernel_size = (3, 3), padding = "same", activation = "relu"))
    model.add(Conv2D(filters = 64, kernel_size = (3, 3), padding = "same", activation = "relu"))

    model.add(MaxPool2D(pool_size = (2, 2), strides = (2, 2)))

    model.add(Conv2D(filters = 128, kernel_size = (3, 3), padding = "same", activation = "relu"))
    model.add(Conv2D(filters = 128, kernel_size = (3, 3), padding = "same", activation = "relu"))

    model.add(MaxPool2D(pool_size = (2, 2), strides = (2, 2)))

    model.add(Conv2D(filters = 256, kernel_size = (3, 3), padding = "same", activation = "relu"))
    model.add(Conv2D(filters = 256, kernel_size = (3, 3), padding = "same", activation = "relu"))
    model.add(Conv2D(filters = 256, kernel_size = (3, 3), padding = "same", activation = "relu"))

    model.add(MaxPool2D(pool_size = (2, 2), strides = (2, 2)))

    model.add(Conv2D(filters = 512, kernel_size = (3, 3), padding = "same", activation = "relu"))
    model.add(Conv2D(filters = 512, kernel_size = (3, 3), padding = "same", activation = "relu"))
    model.add(Conv2D(filters = 512, kernel_size = (3, 3), padding = "same", activation = "relu"))

    model.add(MaxPool2D(pool_size = (2, 2), strides = (2, 2)))

    model.add(Conv2D(filters = 512, kernel_size = (3, 3), padding = "same", activation = "relu"))
    model.add(Conv2D(filters = 512, kernel_size = (3, 3), padding = "same", activation = "relu"))
    model.add(Conv2D(filters = 512, kernel_size = (3, 3), padding = "same", activation = "relu"))

    model.add(MaxPool2D(pool_size = (2, 2), strides = (2, 2)))

    model.add(Flatten())

    model.add(Dense(units = 4096, activation = "relu"))
    model.add(Dense(units = 4096, activation = "relu"))

    model.add(Dense(units = 9, activation = "softmax"))

    opt = Adam(learning_rate = 0.001)

    model.compile(optimizer = opt, loss = keras.losses.sparse_categorical_crossentropy, metrics = ['accuracy'])

    model.fit(trainImages, trainLabels, epochs = 7)

    pred = model.predict(testImages)

    print(pred)
 

и я получил эту ошибку

 Epoch 1/7
2021-08-10 11:59:11.946324: I tensorflow/core/common_runtime/bfc_allocator.cc:1066] Stats: 
Limit:                      2247832372
InUse:                      2183536128
MaxInUse:                   2238205184
NumAllocs:                         205
MaxAllocSize:                513546496
Reserved:                            0
PeakReserved:                        0
LargestFreeBlock:                    0

2021-08-10 11:59:11.946718: W tensorflow/core/common_runtime/bfc_allocator.cc:467] ***************************************_********************************************************xxxx
2021-08-10 11:59:11.946908: W tensorflow/core/framework/op_kernel.cc:1767] OP_REQUIRES failed at conv_ops_fused_impl.h:778 : Resource exhausted: OOM when allocating tensor with shape[32,64,224,224] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
Traceback (most recent call last):
  File "C:/Users/razva/OneDrive/Desktop/Vehicle Color Recognition/main.py", line 328, in <module>
    vgg16(trainImages, trainLabels, testImages, testLabels)
  File "C:/Users/razva/OneDrive/Desktop/Vehicle Color Recognition/main.py", line 285, in vgg16
    model.fit(trainImages, trainLabels, epochs = 7)
  File "C:Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packageskerasenginetraining.py", line 1158, in fit
    tmp_logs = self.train_function(iterator)
  File "C:Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packagestensorflowpythoneagerdef_function.py", line 889, in __call__
    result = self._call(*args, **kwds)
  File "C:Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packagestensorflowpythoneagerdef_function.py", line 950, in _call
    return self._stateless_fn(*args, **kwds)
  File "C:Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packagestensorflowpythoneagerfunction.py", line 3023, in __call__
    return graph_function._call_flat(
  File "C:Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packagestensorflowpythoneagerfunction.py", line 1960, in _call_flat
    return self._build_call_outputs(self._inference_function.call(
  File "C:Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packagestensorflowpythoneagerfunction.py", line 591, in call
    outputs = execute.execute(
  File "C:Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packagestensorflowpythoneagerexecute.py", line 59, in quick_execute
    tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.ResourceExhaustedError:  OOM when allocating tensor with shape[32,64,224,224] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[node sequential/conv2d_1/Relu (defined at Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packageskerasbackend.py:4700) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_1721]

Errors may have originated from an input operation.
Input Source operations connected to node sequential/conv2d_1/Relu:
 sequential/conv2d_1/BiasAdd (defined at Usersrazvaminiconda3envsVehicle Color Recognitionlibsite-packageskeraslayersconvolutional.py:263)

Function call stack:
train_function
 

Форма входного сигнала равна (224, 224, 3). Я использовал np.array в первых 4 строках, потому что получил еще одну ошибку. Мне нужно распознать 9 классов. Как я могу устранить эту ошибку и почему она у меня возникает? Я установил cuda, может быть, в этом проблема?
Системные характеристики: Windows 10, Gtx 1650 4 ГБ

Комментарии:

1. Ошибка есть OOM (Out Of Memory) ошибка. Честно говоря, даже если вы каким-то образом исправите эту ошибку, вас все равно будут беспокоить очень медленные тренировки и плохие результаты из-за меньшего размера пакета. Вы должны попытаться сделать это на онлайн-платформе, google colab которая полностью бесплатна и предоставляет вам графические процессоры с памятью около 16 ГБ. Вы можете обучать эти небольшие модели там с гораздо большей скоростью и большим размером партии.