python - 张量流2;如何使用预定义的稀疏权重架构创建自定义层/渐变?
问题描述
我的目标是在 Tensorflow 2 中创建一个具有预定义、固定和稀疏权重结构的自定义层。出于内存原因,我们需要将权重矩阵保持为可训练的 SparseTensor 的形式。
到目前为止,我们的解决方案是将 SparseTensor 的权重/值表示为自定义层(“self.w”)中的标准可训练对象,并使用自定义层(“matmul_dense_sparse”)处理稀疏矩阵运算和反向传播”)。
我的问题是,如何使用自定义梯度在稀疏网络架构上进行反向传播?
这是一个玩具示例(TF 2.1.0;急切执行):
import numpy as np
tf.__version__
# 2.1.0
@tf.custom_gradient
def matmul_dense_sparse(dense, sparse_mat):
sparse_mat = tf.sparse.to_dense(sparse_mat)
sparse_indices = sparse_mat.indices
ta = tf.transpose(dense)
b = sparse_mat
tb = tf.sparse.transpose(b)
res = tf.transpose(tf.sparse.sparse_dense_matmul(tb, ta))
def grad_fn(grad_res):
print(grad_res)
tgrad = tf.transpose(grad_res)
grad_dense = tf.transpose(tf.sparse.sparse_dense_matmul(b, tgrad))
dense_edge_starts = tf.gather(dense, sparse_indices[:, 0], axis=1)
grad_res_edge_ends = tf.gather(grad_res, sparse_indices[:, 1], axis=1)
grad_values = tf.reduce_sum(tf.multiply(dense_edge_starts, grad_res_edge_ends), axis=0)
return grad_dense, grad_values
return res, grad_fn
# custom layer with sparse weight architecture
class SparseLinear(tf.keras.layers.Layer):
def __init__(self, indices, units=32):
super(SparseLinear, self).__init__()
self.units = units
self.indices = indices
def build(self, input_shape):
# self.w corresponds to the trainable "values" in the sparse tensor
self.w = self.add_weight(shape=(self.indices.shape[0],),
initializer='random_normal',
trainable=True)
self.sparse_mat = tf.sparse.reorder(tf.sparse.SparseTensor(indices=self.indices, values=self.w, dense_shape=[input_shape[-1], self.units]))
def call(self, x):
return matmul_dense_sparse(x, self.sparse_mat)
# non-zero weights in SparseTensor
sp_idxs = np.array([[0, 2],[1, 2], [8, 1], [9, 3], [10, 5]])
train_x = np.random.rand(20, 100)
train_y = np.random.rand(20, 1)
# build keras model
n_features = train_x.shape[1]
inputs = tf.keras.Input(shape=(n_features,), name='snp_input')
layers = SparseLinear(indices=sp_idxs, units=32)(inputs)
layers = tf.keras.layers.Dense(12, activation='linear')(layers)
layers = tf.keras.layers.Dense(1, activation='linear')(layers)
model = tf.keras.Model(inputs=inputs, outputs=layers, name='model')
optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
optimizer=optimizer,
metrics=['mae', 'mse'])
history = model.fit(train_x, train_y)
weights = model.get_weights()
以及相应的错误信息:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-11-34c364b12112> in <module>()
51 n_features = train_x.shape[1]
52 inputs = tf.keras.Input(shape=(n_features,), name='snp_input')
---> 53 layers = Linear(indices=sp_idxs, units=32)(inputs)
54 layers = tf.keras.layers.Dense(12, activation='linear')(layers)
~/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
771 not base_layer_utils.is_in_eager_or_tf_function()):
772 with auto_control_deps.AutomaticControlDependencies() as acd:
--> 773 outputs = call_fn(cast_inputs, *args, **kwargs)
774 # Wrap Tensors in `outputs` in `tf.identity` to avoid
775 # circular dependencies.
~/.local/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
TypeError: in converted code:
<ipython-input-11-34c364b12112>:37 call *
return matmul_dense_sparse(x, self.sparse_mat)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:256 __call__
return self._d(self._f, a, k)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:212 decorated
return _graph_mode_decorator(wrapped, args, kwargs)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:307 _graph_mode_decorator
args = [ops.convert_to_tensor(x) for x in args]
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:307 <listcomp>
args = [ops.convert_to_tensor(x) for x in args]
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1314 convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:317 _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:258 constant
allow_broadcast=True)
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:296 _constant_impl
allow_broadcast=allow_broadcast))
/home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/tensor_util.py:547 make_tensor_proto
"supported type." % (type(values), values))
TypeError: Failed to convert object of type <class 'tensorflow.python.framework.sparse_tensor.SparseTensor'> to Tensor. Contents: SparseTensor(indices=tf.Tensor(
[[ 0 2]
[ 1 2]
[ 8 1]
[ 9 3]
[10 5]], shape=(5, 2), dtype=int64), values=tf.Tensor([ 0.03837506 -0.07365214 -0.02256368 -0.05631712 0.05937713], shape=(5,), dtype=float32), dense_shape=tf.Tensor([100 32], shape=(2,), dtype=int64)). Consider casting elements to a supported type.
解决方案
我已经在 tensorflow 1.13.1 中成功实现了这样一个层,请参见:
https://github.com/ArnovanHilten/GenNet/blob/master/utils/LocallyDirectedConnected.py
对于张量流 2:
https://github.com/ArnovanHilten/GenNet/blob/master/utils/LocallyDirectedConnected_tf2.py
推荐阅读
- spring-boot - 从项目文件夹加载 CSS、图像时出现问题
- javascript - 使用复选框交换 div 元素内内容的位置
- oracle - 如何在 Oracle 存储过程中打印异常名称?
- c++ - 包含 OpenCV 头文件的 C++ 程序的交叉编译失败
- python - 使用python将数据框上传到谷歌表格时出错?
- anaconda - 没有名为“tokenize”的模块
- javascript - highcharts 中图表的大小
- asp.net-core - 哪个 nuget 包包含 MediaTypeNames?
- jsp - JEE:如何停止 URL 映射?
- ruby-on-rails-5 - 如何在 Rails 管理员关联模型中进行自定义下拉