python - 如何在转置层之间绑定权重?
问题描述
我尝试使用以下代码在 tensorflow 2.0 keras 中绑定权重。但它显示了这个错误?有谁知道如何编写捆绑权重密集层?
tf.random.set_seed(0)
with tf.device('/cpu:0'):
# This returns a tensor
inputs = Input(shape=(784,))
# a layer instance is callable on a tensor, and returns a tensor
layer_1 = Dense(64, activation='relu')
layer_1_output = layer_1(inputs)
layer_2 = Dense(64, activation='relu')
layer_2_output = layer_2(layer_1_output)
weights = tf.transpose(layer_1.weights[0]).numpy()
print(weights.shape)
transpose_layer = Dense(
784, activation='relu')
transpose_layer_output = transpose_layer(layer_2_output)
transpose_layer.set_weights(weights)
predictions = Dense(10, activation='softmax')(transpose_layer)
# This creates a model that includes
# the Input layer and three Dense layers
model = Model(inputs=inputs, outputs=predictions)
model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
loss='categorical_crossentropy',
metrics=['accuracy'])
# print(model.weights)
model.summary()
错误
Traceback (most recent call last):
File "practice_2.py", line 62, in <module>
transpose_layer.set_weights(weights)
File "/Users/cheesiang_leow/.virtualenvs/tensorflow-2.0/lib/python3.6/site-
packages/tensorflow/python/keras/engine/base_layer.py", line 934, in set_weights
str(weights)[:50] + '...')
ValueError: You called `set_weights(weights)` on layer "dense_2" with a weight
list of length 64, but the layer was expecting 2 weights. Provided weights:
[[-0.03499636 0.0214913 0.04076344 ... -0.06531...
解决方案
我花了很多时间才弄清楚,但我认为这是通过继承 Keras Dense 层来实现 Tied Weights 的方式。
class TiedLayer(Dense):
def __init__(self, layer_sizes, l2_normalize=False, dropout=0.0, *args, **kwargs):
self.layer_sizes = layer_sizes
self.l2_normalize = l2_normalize
self.dropout = dropout
self.kernels = []
self.biases = []
self.biases2 = []
self.uses_learning_phase = True
self.activation = kwargs['activation']
if self.activation == "leaky_relu":
self.activation = kwargs.pop('activation')
self.activation = LeakyReLU()
print(self.activation)
super().__init__(units=1, *args, **kwargs) # 'units' not used
def compute_output_shape(self, input_shape):
return input_shape
def build(self, input_shape):
assert len(input_shape) >= 2
input_dim = int(input_shape[-1])
self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
# print(input_dim)
for i in range(len(self.layer_sizes)):
self.kernels.append(
self.add_weight(
shape=(
input_dim,
self.layer_sizes[i]),
initializer=self.kernel_initializer,
name='ae_kernel_{}'.format(i),
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint))
if self.use_bias:
self.biases.append(
self.add_weight(
shape=(
self.layer_sizes[i],
),
initializer=self.bias_initializer,
name='ae_bias_{}'.format(i),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
input_dim = self.layer_sizes[i]
if self.use_bias:
for n, i in enumerate(range(len(self.layer_sizes)-2, -1, -1)):
self.biases2.append(
self.add_weight(
shape=(
self.layer_sizes[i],
),
initializer=self.bias_initializer,
name='ae_bias2_{}'.format(n),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
self.biases2.append(self.add_weight(
shape=(
int(input_shape[-1]),
),
initializer=self.bias_initializer,
name='ae_bias2_{}'.format(len(self.layer_sizes)),
regularizer=self.bias_regularizer,
constraint=self.bias_constraint))
self.built = True
def call(self, inputs):
return self.decode(self.encode(inputs))
def _apply_dropout(self, inputs):
dropped = K.backend.dropout(inputs, self.dropout)
return K.backend.in_train_phase(dropped, inputs)
def encode(self, inputs):
latent = inputs
for i in range(len(self.layer_sizes)):
if self.dropout > 0:
latent = self._apply_dropout(latent)
print(self.kernels[i])
latent = K.backend.dot(latent, self.kernels[i])
if self.use_bias:
print(self.biases[i])
latent = K.backend.bias_add(latent, self.biases[i])
if self.activation is not None:
latent = self.activation(latent)
if self.l2_normalize:
latent = latent / K.backend.l2_normalize(latent, axis=-1)
return latent
def decode(self, latent):
recon = latent
for i in range(len(self.layer_sizes)):
if self.dropout > 0:
recon = self._apply_dropout(recon)
print(self.kernels[len(self.layer_sizes) - i - 1])
recon = K.backend.dot(recon, K.backend.transpose(
self.kernels[len(self.layer_sizes) - i - 1]))
if self.use_bias:
print(self.biases2[i])
recon = K.backend.bias_add(recon, self.biases2[i])
if self.activation is not None:
recon = self.activation(recon)
return recon
def get_config(self):
config = {
'layer_sizes': self.layer_sizes
}
base_config = super().get_config()
base_config.pop('units', None)
return dict(list(base_config.items()) + list(config.items()))
@classmethod
def from_config(cls, config):
return cls(**config)
希望它可以帮助别人。
推荐阅读
- json - 在 ngfor 循环中以角度显示 json 数据(来自 firebase 的数据)
- javascript - 在浏览器与 nodeJS 服务器中使用 SheetJS 生成 XLS 文件:优缺点
- spring-boot - Kafka 批处理监听器,轮询固定数量的记录(尽可能)
- c++ - C++ 语句重新排序
- graphql - 使用 Appsync 更新 GraphQL 中的突变
- r - 清洁数据和关联规则 - R
- ruby - 我如何处理 rest-client 500 错误响应并继续遍历我的循环?
- excel - 从 Excel 到 XML 的日期导出
- list - 根据过滤结果动态获取项目列表计数
- git - 无法推送到远程仓库