#python #tensorflow #keras
Вопрос:
У меня есть модель, определяемая следующим образом:
"""
Siamese model with contrastive loss, based on:
https://keras.io/examples/vision/siamese_contrastive/
"""
import os
from typing import Optional
import model.metrics as metrics
import numpy as np
import tensorflow as tf
from model.train_pairs.data_generator import get_train_generator, get_valid_generator
from model.training import get_cyclical_lr, get_augmentation_layer
from tensorflow.keras import Model
from tensorflow.keras import backend as K
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0, ResNet50
from tensorflow.keras.optimizers import Adam
def l1_distance(vects: tuple[np.ndarray, np.ndarray]) -> float:
"""
Finds the L1 distance between two vectors.
Args:
vects: List containing two tensors of same length.
Returns:
Element-wise L1 distance.
"""
x, y = vects
return K.abs(x - y)
def loss(margin: Optional[int] = 1):
"""
Provides @contrastive_loss an enclosing scope with variable 'margin'.
Args:
margin: Defines the baseline for distance for which pairs
should be classified as dissimilar. - (default is 1).
Returns:
@contrastive_loss function with data ('margin') attached.
"""
def contrastive_loss(y_true: list[int], y_pred: list[int]) -> float:
"""
Calculates the contrastive loss:
loss = mean( (1-true_value) * square(prediction) true_value * square( max(margin-prediction, 0) ))
Args:
y_true: List of labels.
y_pred: List of predictions of same length as of y_true.
Returns:
Contrastive loss as floating point value.
"""
square_pred = tf.math.square(y_pred)
margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
return tf.math.reduce_mean(
(1 - y_true) * square_pred (y_true) * margin_square
)
return contrastive_loss
def create_model(
target_shape: Optional[tuple[int, int, int]] = (224, 224, 3),
path: Optional[str] = None,
) -> Model:
"""
Creates the siamese model.
Args:
target_shape: image dimensions.
path: path to best weights.
Returns:
Siamese model.
"""
input_1 = layers.Input(shape=target_shape, name="inp_1")
input_2 = layers.Input(shape=target_shape, name="inp_2")
# input_1aug = img_augmentation(input_1)
# input_2aug = img_augmentation(input_2)
input = layers.Input(shape=target_shape, name="input")
lambda_1 = layers.Lambda(
lambda image: tf.keras.applications.resnet.preprocess_input(image),
name="pre_process",
)(input)
base_cnn = ResNet50(
weights="imagenet",
input_tensor=lambda_1,
input_shape=target_shape,
include_top=False,
)
# CONV/FC -> BatchNorm -> ReLu(or other activation) -> Dropout -> CONV/FC ->
pool = layers.MaxPooling2D(pool_size=(2, 2))(base_cnn.output)
flatten = layers.Flatten(name="base_output_flatten")(pool)
dense1 = layers.BatchNormalization(name="dense1_norm")(flatten)
dense1 = layers.Dense(512, activation="relu", name="dense1")(dense1)
dense1 = layers.Dropout(0.3, name="dense1_dropout")(dense1)
dense2 = layers.BatchNormalization(name="dense2_norm")(dense1)
dense2 = layers.Dense(256, activation="relu", name="dense2")(dense2)
dense2 = layers.Dropout(0.2, name="dense2_dropout")(dense2)
output = layers.Dense(256, name="dense_output")(dense2)
embedding = Model(input, output, name="Embedding")
trainable = False
for layer in base_cnn.layers:
if layer.name == "conv5_block1_out":
trainable = True
layer.trainable = trainable
tower_1 = embedding(input_1)
tower_2 = embedding(input_2)
merge_layer = layers.Lambda(l1_distance, name="l1")([tower_1, tower_2])
normal_layer = tf.keras.layers.BatchNormalization(name="l1_norm")(merge_layer)
comparison_layer = layers.Dense(
1,
activation="sigmoid",
name="final_layer",
)(normal_layer)
siamese = Model(inputs=[input_1, input_2], outputs=comparison_layer)
if path is not None:
siamese.load_weights(path)
return siamese
def train(
split_path: str,
target_shape: Optional[tuple[int, int, int]] = (224, 224, 3),
epochs: Optional[int] = 100,
batch_size: Optional[int] = 16,
margin: Optional[int] = 1,
) -> None:
"""
Starts training the siamese model.
Args:
split_path: path to split file.
target_shape: image dimensions.
epochs: number of epochs.
batch_size: batch size.
margin: margin for contrastive loss.
"""
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
monitor="loss", patience=5
)
tensorboard_callback = tf.keras.callbacks.TensorBoard(
log_dir="/logs", histogram_freq=1
)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath="/logs/weights{epoch:04d}.tf", save_weights_only=True, save_freq=1
)
train_generator = get_train_generator(
split_path, batch_size=batch_size, input_size=target_shape
)
steps_per_epoch = len(train_generator)
clr = get_cyclical_lr(2 * steps_per_epoch)
optimizer = Adam(clr)
siamese = create_model(target_shape)
siamese.compile(
loss=loss(margin=margin),
optimizer=optimizer,
metrics=[metrics.accuracy, metrics.precision, metrics.recall, metrics.f1],
)
siamese.summary()
siamese.fit(
train_generator,
validation_data=get_valid_generator(
split_path, batch_size=batch_size, input_size=target_shape
),
epochs=epochs,
callbacks=[
early_stopping_callback,
tensorboard_callback,
model_checkpoint_callback,
],
verbose=1,
)
def save_model(path: str) -> None:
"""
Saves model with given weights.
Args:
path: path to weights.
"""
siamese = create_model(path=path)
base_dir = os.path.dirname(path)
siamese.save(os.path.join(base_dir, "pairs_siamese.h5"), include_optimizer=False)
Как видите, я сохранил свою модель с помощью контрольных точек.
Когда я пытаюсь загрузить его, я получаю:
Traceback (most recent call last):
File "/code/main.py", line 136, in <module>
args.func(args) # call the default function
File "/code/functions.py", line 60, in save_model
save_model_pairs(weights_path)
File "/code/model/train_pairs/model.py", line 200, in save_model
siamese = create_model(path=path)
File "/code/model/train_pairs/model.py", line 130, in create_model
siamese.load_weights(path)
File "/usr/local/lib/python3.9/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "<__array_function__ internals>", line 5, in transpose
File "/usr/local/lib/python3.9/dist-packages/numpy/core/fromnumeric.py", line 660, in transpose
return _wrapfunc(a, 'transpose', axes)
File "/usr/local/lib/python3.9/dist-packages/numpy/core/fromnumeric.py", line 57, in _wrapfunc
return bound(*args, **kwds)
ValueError: axes don't match array
Я могу найти множество ссылок по этой проблеме, но я не смог исправить ее для своего случая. Я пытался использовать tf-nightly, который должен был исправлять вложенные модели, но у меня это не сработало. Я тренировал эту модель в течение нескольких недель, поэтому я ищу способ использовать ее и не потерять все, что я сделал. Если решения не существует, решит ли проблему значение save_weights_only=False?