Ошибка TensorFlow CustomModel (Ошибка компиляции: Несовместимые формы: [128,80,1] против [128,80]) (Двунаправленный слой)

#python #tensorflow

Вопрос:

По какой-то причине я должен использовать CustomModel в TensorFlow, модель отлично работает при использовании API высокого уровня, однако, когда я пытаюсь использовать тот же класс CostomModel, предоставляемый веб-сайтом TensorFlow, но с двунаправленными слоями, я получаю следующую ошибку:

Ошибка компиляции: Несовместимые формы: [128,80,1] против [128,80]

 class CustomModel(keras.Model):
    def train_step(self, data):
        x, y = data
        
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute our own loss
            loss = keras.losses.mean_absolute_error(y, y_pred)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Compute our own metrics
        loss_tracker.update_state(loss)
        mae_metric.update_state(y, y_pred)
        return {"loss": loss_tracker.result(), "mae": mae_metric.result()}

    @property
    def metrics(self):
        return [loss_tracker, mae_metric]

 
 n_splits = 5
BATCH_SIZE=1024
gkf = model_selection.GroupKFold(n_splits=5)
scores_train = []
scores_valid = []

preds_valid_array = np.zeros(X.shape[0])
preds_test_array = np.zeros(test.shape[0])

for fold, (train_idx, valid_idx) in enumerate(gkf.split(X, y, groups=groups)):
        
    print(f"Fold {fold 1} -------------->")
    x_train, y_train = X[train_idx].reshape(-1, 80, X.shape[1]), y[train_idx].reshape(-1, 80, )
    x_valid, y_valid = X[valid_idx].reshape(-1, 80, X.shape[1]), y[valid_idx].reshape(-1, 80, )
    
    with tpu_strategy.scope():
        inputs = keras.Input(shape=(80, X.shape[1])) 
        x = keras.layers.Bidirectional(keras.layers.LSTM(1024, return_sequences=True))(inputs)
        x = keras.layers.Bidirectional(keras.layers.LSTM(512, return_sequences=True))(x)
        x = keras.layers.Bidirectional(keras.layers.LSTM(256, return_sequences=True))(x)
        x = keras.layers.Bidirectional(keras.layers.LSTM(128, return_sequences=True))(x)
        x = keras.layers.Bidirectional(keras.layers.LSTM(64, return_sequences=True))(x)
        x = keras.layers.Dense(64)(x)
        x = keras.activations.selu(x)
        x = keras.layers.Dense(32)(x)
        x = keras.activations.selu(x)
        outputs = keras.layers.Dense(1)(x)
        
        model = CustomModel(inputs, outputs)

        model.compile(optimizer="adam")
        
        loss_tracker = keras.metrics.Mean(name="loss")
        mae_metric = keras.metrics.MeanAbsoluteError(name="mae")
    
    filepath = f'./model_fold{fold 1}.hdf5'
    
    
    checkpoint = ModelCheckpoint(filepath=filepath, 
                             monitor='val_loss',
                             verbose=1, 
                             save_best_only=True,
                             mode='min')
    
    lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=10, verbose=1)
    
    es = EarlyStopping(monitor="val_loss", patience=40, verbose=1, mode="min", restore_best_weights=True)

    checkpoint_filepath = f"folds{fold}.hdf5"
    sv = keras.callbacks.ModelCheckpoint(
        checkpoint_filepath, monitor='val_loss', verbose=1, save_best_only=True,
        save_weights_only=False, mode='auto', save_freq='epoch',
        options=None
    )

    model.fit(x_train, y_train, validation_data=(x_valid, y_valid), epochs=300, batch_size=BATCH_SIZE, callbacks=[lr, es, sv])
    
    #model = load_model(filepath)
    
    preds_train = model.predict(x_train.reshape(-1, 80, X.shape[1])).squeeze().reshape(-1, 1).squeeze()
    preds_valid = model.predict(x_valid.reshape(-1, 80, X.shape[1])).squeeze().reshape(-1, 1).squeeze()
    preds_test = model.predict(test.reshape(-1, 80, test.shape[1])).squeeze().reshape(-1, 1).squeeze()

    preds_valid_array[valid_idx]  = preds_valid
    preds_test_array  = preds_test / n_splits
        
    score_train = metrics.mean_absolute_error(y_train.reshape(-1,), preds_train)
    score_valid = metrics.mean_absolute_error(y_valid.reshape(-1,), preds_valid)
    print(score_valid)
    scores_train.append(score_train)
    scores_valid.append(score_valid)
    
print('Mean train score =', np.mean(scores_train), 'STD train =', np.std(scores_train, ddof=1))
print('Mean valid score =', np.mean(scores_valid), 'STD valid =', np.std(scores_valid, ddof=1))
 

Комментарии:

1. Какую форму имеет X? И не могли бы вы, пожалуйста, добавить полное сообщение об ошибке?