首页 > 解决方案 > 仅保存权重后加载连体模型:轴与数组不匹配

问题描述

我有一个模型定义如下:

"""
Siamese model with contrastive loss, based on:
    https://keras.io/examples/vision/siamese_contrastive/
"""
import os
from typing import Optional

import model.metrics as metrics
import numpy as np
import tensorflow as tf
from model.train_pairs.data_generator import get_train_generator, get_valid_generator
from model.training import get_cyclical_lr, get_augmentation_layer
from tensorflow.keras import Model
from tensorflow.keras import backend as K
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0, ResNet50
from tensorflow.keras.optimizers import Adam


def l1_distance(vects: tuple[np.ndarray, np.ndarray]) -> float:
    """
    Finds the L1 distance between two vectors.

    Args:
        vects: List containing two tensors of same length.

    Returns:
        Element-wise L1 distance.
    """
    x, y = vects

    return K.abs(x - y)


def loss(margin: Optional[int] = 1):
    """
    Provides @contrastive_loss an enclosing scope with variable 'margin'.

    Args:
        margin: Defines the baseline for distance for which pairs
            should be classified as dissimilar. - (default is 1).

    Returns:
          @contrastive_loss function with data ('margin') attached.
    """

    def contrastive_loss(y_true: list[int], y_pred: list[int]) -> float:
        """
        Calculates the contrastive loss:
        loss = mean( (1-true_value) * square(prediction) + true_value * square( max(margin-prediction, 0) ))

        Args:
          y_true: List of labels.
          y_pred: List of predictions of same length as of y_true.

        Returns:
           Contrastive loss as floating point value.
        """
        square_pred = tf.math.square(y_pred)
        margin_square = tf.math.square(tf.math.maximum(margin - (y_pred), 0))
        return tf.math.reduce_mean(
            (1 - y_true) * square_pred + (y_true) * margin_square
        )

    return contrastive_loss


def create_model(
    target_shape: Optional[tuple[int, int, int]] = (224, 224, 3),
    path: Optional[str] = None,
) -> Model:
    """
    Creates the siamese model.

    Args:
        target_shape: image dimensions.
        path: path to best weights.

    Returns:
        Siamese model.
    """
    input_1 = layers.Input(shape=target_shape, name="inp_1")
    input_2 = layers.Input(shape=target_shape, name="inp_2")
    # input_1aug = img_augmentation(input_1)
    # input_2aug = img_augmentation(input_2)

    input = layers.Input(shape=target_shape, name="input")
    lambda_1 = layers.Lambda(
        lambda image: tf.keras.applications.resnet.preprocess_input(image),
        name="pre_process",
    )(input)
    base_cnn = ResNet50(
        weights="imagenet",
        input_tensor=lambda_1,
        input_shape=target_shape,
        include_top=False,
    )
    # CONV/FC -> BatchNorm -> ReLu(or other activation) -> Dropout -> CONV/FC ->
    pool = layers.MaxPooling2D(pool_size=(2, 2))(base_cnn.output)
    flatten = layers.Flatten(name="base_output_flatten")(pool)
    dense1 = layers.BatchNormalization(name="dense1_norm")(flatten)
    dense1 = layers.Dense(512, activation="relu", name="dense1")(dense1)
    dense1 = layers.Dropout(0.3, name="dense1_dropout")(dense1)
    dense2 = layers.BatchNormalization(name="dense2_norm")(dense1)
    dense2 = layers.Dense(256, activation="relu", name="dense2")(dense2)
    dense2 = layers.Dropout(0.2, name="dense2_dropout")(dense2)
    output = layers.Dense(256, name="dense_output")(dense2)

    embedding = Model(input, output, name="Embedding")

    trainable = False
    for layer in base_cnn.layers:
        if layer.name == "conv5_block1_out":
            trainable = True
        layer.trainable = trainable

    tower_1 = embedding(input_1)
    tower_2 = embedding(input_2)

    merge_layer = layers.Lambda(l1_distance, name="l1")([tower_1, tower_2])
    normal_layer = tf.keras.layers.BatchNormalization(name="l1_norm")(merge_layer)
    comparison_layer = layers.Dense(
        1,
        activation="sigmoid",
        name="final_layer",
    )(normal_layer)
    siamese = Model(inputs=[input_1, input_2], outputs=comparison_layer)

    if path is not None:
        siamese.load_weights(path)

    return siamese


def train(
    split_path: str,
    target_shape: Optional[tuple[int, int, int]] = (224, 224, 3),
    epochs: Optional[int] = 100,
    batch_size: Optional[int] = 16,
    margin: Optional[int] = 1,
) -> None:
    """
    Starts training the siamese model.

    Args:
        split_path: path to split file.
        target_shape: image dimensions.
        epochs: number of epochs.
        batch_size: batch size.
        margin: margin for contrastive loss.
    """
    early_stopping_callback = tf.keras.callbacks.EarlyStopping(
        monitor="loss", patience=5
    )
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir="/logs", histogram_freq=1
    )
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath="/logs/weights{epoch:04d}.tf", save_weights_only=True, save_freq=1
    )

    train_generator = get_train_generator(
        split_path, batch_size=batch_size, input_size=target_shape
    )
    steps_per_epoch = len(train_generator)
    clr = get_cyclical_lr(2 * steps_per_epoch)
    optimizer = Adam(clr)

    siamese = create_model(target_shape)

    siamese.compile(
        loss=loss(margin=margin),
        optimizer=optimizer,
        metrics=[metrics.accuracy, metrics.precision, metrics.recall, metrics.f1],
    )
    siamese.summary()

    siamese.fit(
        train_generator,
        validation_data=get_valid_generator(
            split_path, batch_size=batch_size, input_size=target_shape
        ),
        epochs=epochs,
        callbacks=[
            early_stopping_callback,
            tensorboard_callback,
            model_checkpoint_callback,
        ],
        verbose=1,
    )


def save_model(path: str) -> None:
    """
    Saves model with given weights.

    Args:
        path: path to weights.
    """
    siamese = create_model(path=path)
    base_dir = os.path.dirname(path)
    siamese.save(os.path.join(base_dir, "pairs_siamese.h5"), include_optimizer=False)

可以看到,我使用检查点保存了我的模型。当我尝试加载它时,我得到:

Traceback (most recent call last):
  File "/code/main.py", line 136, in <module>
    args.func(args)  # call the default function
  File "/code/functions.py", line 60, in save_model
    save_model_pairs(weights_path)
  File "/code/model/train_pairs/model.py", line 200, in save_model
    siamese = create_model(path=path)
  File "/code/model/train_pairs/model.py", line 130, in create_model
    siamese.load_weights(path)
  File "/usr/local/lib/python3.9/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "<__array_function__ internals>", line 5, in transpose
  File "/usr/local/lib/python3.9/dist-packages/numpy/core/fromnumeric.py", line 660, in transpose
    return _wrapfunc(a, 'transpose', axes)
  File "/usr/local/lib/python3.9/dist-packages/numpy/core/fromnumeric.py", line 57, in _wrapfunc
    return bound(*args, **kwds)
ValueError: axes don't match array

我可以找到很多关于这个问题的链接,但我无法为我的案例修复它。我尝试使用 tf-nightly,它应该可以修复嵌套模型,但它对我不起作用。我已经训练这个模型好几个星期了,所以我正在寻找一种使用它的方法,而不是丢失我所做的一切。如果不存在解决方案,设置 save_weights_only=False 会解决问题吗?

标签: pythontensorflowkeras

解决方案


推荐阅读