python - 如何重用keras功能模型的层级
问题描述
import tensorflow as tf
import keras
def get_model():
x1 = keras.layers.Dense(6, activation='relu',input_shape=(10,))
x2 = keras.layers.Dense(3, activation='relu')(x1)
output_ = keras.layers.Dense(10,acitvation='sigmoid')(x2)
model = keras.model(inputs=[x1], outputs=[output_])
return model
model = get_model()
model.compile(...)
chk_point = keras.callbacks.ModelCheckpoint(f'./best_model.h5',
monitor='val_loss', save_best_only=True, mode='min')
model.fit(..., callbacks=[chk_point])
def new_model():
old = '../best_model.h5' #using old model for training new model
(我正在尝试使用预训练的功能模型进行迁移学习)
现在我想得到 best_model 的所有层。如果可能的话,我想删除我的 best_model 的最后一层。我想冻结 best_model 的所有层,即 trainable = False。并向该模型添加新层。
我正在尝试在 best_model.h5 的输入和输出相同的火车数据集上进行降噪自动编码器(例如 input_shape=(100,) 和 output_shape=(100,))。然后我正在考虑冻结所有这些层并在向该模型添加新层之后删除该模型的最后一层。然后像往常一样在 X 和 y 上训练该模型
解决方案
一种方法是定义新模型,然后从旧模型中复制层权重(最后一层除外)并将 trainable 设置为 False。例如,假设您要删除最后一层并添加两个密集层(这只是一个示例)。请注意,当前模型的输入和输出大小为 (10,)。另请注意,功能 API 中的第一层是输入层。这是代码:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np
def get_model():
inputs = keras.Input(shape=(10, ))
x1 = keras.layers.Dense(6, activation='relu')(inputs)
x2 = keras.layers.Dense(3, activation='relu')(x1)
output_ = keras.layers.Dense(10,activation='sigmoid')(x2)
model = keras.Model(inputs=inputs, outputs=[output_])
return model
def get_new_model():
inputs = keras.Input(shape=(10, ))
x1 = keras.layers.Dense(6, activation='relu')(inputs)
x2 = keras.layers.Dense(3, activation='relu')(x1)
# new layers
x3 = keras.layers.Dense(15, activation='relu')(x2)
output_ = keras.layers.Dense(10, activation='sigmoid')(x3)
model = keras.Model(inputs=inputs, outputs=[output_])
return model
model = get_model()
model.compile(optimizer='adam', loss='mse')
batch_size = 16
_ = model.call(inputs=tf.random.normal(shape=(batch_size, 10)))
model.summary()
# create x data using two normal distributions with different mean
# y data is unused in auto encoder
x0 = np.random.normal(loc=0.0, size=(100, 10))
x1 = np.random.normal(loc=0.3, size=(100, 10))
x = np.concatenate((x0, x1), axis=0)
# y is unused
y0 = np.zeros((100, 10))
y1 = np.ones((100, 10))
y = np.concatenate((y0, y1), axis=0)
# split train/validation data
x_train, x_val, y_train, y_val = train_test_split(x, y, train_size=0.7)
print(x_train.shape)
print(y_train.shape)
chk_point = keras.callbacks.ModelCheckpoint(f'./best_model.h5',
monitor='val_loss', save_best_only=True, mode='min')
history = model.fit(x=x_train, y=x_train, batch_size=batch_size, epochs=100, callbacks=[chk_point], validation_data=(x_val, x_val))
# reload old model
model_old = keras.models.load_model('./best_model.h5')
model_old.summary()
# get new model
model_new = get_new_model()
model_new.compile(optimizer='adam', loss='mse')
_ = model_new.call(inputs=tf.random.normal(shape=(batch_size, 10)))
model_new.summary()
# copy the two dense layer weights and set trainable to False
# skip the first layer which is an InputLayer
for count, (layer_old, layer_new) in enumerate(zip(model_old.layers[1:3], model_new.layers[1:3])):
layer_new.trainable = False
layer_new.set_weights(layer_old.get_weights())
model_new.layers[count+1] = layer_new
model_new.summary()
推荐阅读
- visual-studio-code - 如何在 vscode 扩展中读取活动终端输出?
- c - 此程序中的输出屏幕为空
- php - Fullcalendar 显示可用用户而不是占用用户(反转逻辑)
- java - 将流映射到原始索引列表
- amazon-web-services - 如何获取 AWS CodeDeploy 日志并在 BitBucket 管道中显示它们
- git - 不要在 git grep 输出中显示 ^M(回车)
- angularjs - 如果文本长度为零,则显示 ui-select-no-choice 消息
- python-3.x - Dask 比香草蟒最低?我做错了什么?
- excel - 宏运行很快,直到运行不同的宏
- r - 如何根据R中的位置向量替换字符串向量中的字符?