python - Tensorflow:多步衰减函数的输出返回 TypeError
问题描述
我们正在尝试使用此处建议的 tf.train.piecewise_constant() 在 Tensorflow 中编写多步衰减函数。此处的TensorFlow 文档指出:
“启用急切执行时,此函数返回一个函数,该函数又返回衰减的学习率张量”
但是,当我们尝试运行代码时,它返回了一个 TypeError。即使使用 lr() 也会返回相同的错误。
import tensorflow as tf
tf.enable_eager_execution()
import numpy as np
def conv3x3(out_planes, data_format ='channels_last', stride=1, padding='same', dilation=1, name = None,use_bias = False):
"""3x3 convolution with padding"""
return tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 3,data_format= data_format,
strides=(stride, stride), padding='same', use_bias=use_bias,
dilation_rate = (dilation,dilation) , kernel_initializer=tf.initializers.he_normal(),name = name)
def conv1x1(out_planes,data_format ='channels_last', padding = 'same', stride=1):
"""1x1 convolution"""
return tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 1, strides=(stride, stride),data_format= data_format,
padding=padding, use_bias=False, kernel_initializer=tf.initializers.he_normal())
class BasicBlock(tf.keras.Model):
expansion = 1
def __init__(self, planes=1, stride=1, data_format= 'channels_last', downsample=None, dilation=(1, 1), residual=True, key=None, stage = None):
super(BasicBlock, self).__init__()
self.data_format = data_format
bn_axis = 1 if self.data_format == 'channels_first' else 3
self.conv1 = conv3x3(out_planes= planes, stride = stride, padding='same' ,
data_format = self.data_format, dilation=dilation[0], name = '{}_{}_conv0'.format(key,stage))
self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis, name = '{}_{}_BN0'.format(key,stage))
self.conv2 = conv3x3(out_planes =planes, padding='same',
data_format = self.data_format, dilation=dilation[0],name = '{}_{}_conv1'.format(key,stage))
self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis,name = '{}_{}_BN1'.format(key,stage))
self.downsample = downsample
self.relu = tf.keras.layers.ReLU(name = '{}_{}_Relu'.format(key,stage))
self.stride = stride
self.residual = residual
def get_config(self):
base_config = {}
base_config['conv1'] = self.conv1.get_config()
base_config['bn1'] = self.bn1.get_config()
base_config['conv2'] = self.conv2.get_config()
base_config['bn2'] = self.bn2.get_config()
if self.downsample is not None:
base_config['downsample'] = self.downsample.get_config()
return base_config
def call(self, inputs, training=None):
residual = inputs
out = self.conv1(inputs)
out = self.bn1(out,training = training)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample is not None:
residual = self.downsample(inputs)
if self.residual:
out += residual
out = self.relu(out)
return out
class Bottleneck(tf.keras.Model):
expansion = 4
def __init__(self, planes, stride=1, data_format = 'channels_last',downsample=None,dilation=(1, 1)):
super(Bottleneck, self).__init__()
bn_axis = 1 if data_format == 'channels_first' else 3
self.conv1 = conv1x1(planes, data_format = data_format)
self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis)
self.relu = tf.keras.layers.ReLU()
self.conv2 = conv3x3(planes, stride, padding= 'same', bias=False, data_format = data_format, dilation=dilation[1])
self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis)
self.conv3 =conv1x1( planes * 4, data_format = data_format, )
self.bn3 = tf.keras.layers.BatchNormalization(axis=bn_axis) # nn.BatchNorm2d(planes * self.expansion)
self.downsample = downsample
self.stride = stride
def get_config(self):
base_config = {}
base_config['conv1'] = self.conv1.get_config()
base_config['bn1'] = self.bn1.get_config()
base_config['conv2'] = self.conv2.get_config()
base_config['bn2'] = self.bn2.get_config()
base_config['conv3'] = self.conv3.get_config()
base_config['bn3'] = self.bn3.get_config()
if self.downsample is not None:
base_config['downsample'] = self.downsample.get_config()
return base_config
def call(self, inputs, training=None):
identity = inputs
out = self.conv1(inputs)
out = self.bn1(out,training = training)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out,training = training)
out = tf.nn.relu(out)
out = self.conv3(out)
out = self.bn3(out,training = training)
if self.downsample is not None:
identity = self.downsample(inputs)
out += identity
out = self.relu(out)
return out
class pooling (tf.keras.Model):
def __init__(self, pool_size, stride = None, data_format='channels_last'):
super(pooling, self).__init__()
self.pool_size = pool_size
self.data_format = data_format
if stride is None:
self.stride =self.pool_size
else:
self.stride = stride
def call(self, inputs):
return tf.layers.average_pooling2d(inputs, strides =self.stride, pool_size = self.pool_size, data_format = self.data_format)
class DRN(tf.keras.Model):
def __init__(self, block, layers, data_format='channels_last', num_classes=7,channels=(16, 32, 64, 128, 256, 512, 512, 512),
out_map=False, out_middle=False, pool_size=28, arch='D'):
super(DRN, self).__init__()
self.inplanes = channels[0]
self.out_map = out_map
self.out_dim = channels[-1]
self.out_middle = out_middle
self.arch = arch
self.poolsize = pool_size
self.data_format = data_format
self.bn_axis = 1 if data_format == 'channels_first' else 3
self.conv0 = tf.keras.layers.Conv2D(filters=channels[0], kernel_size=7, strides=1, padding='same',
use_bias=False, data_format = self.data_format, kernel_initializer=tf.initializers.he_normal(), name ='L0_conv0' )
self.bn0 = tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='L0_BN0')
self.relu0 = tf.keras.layers.ReLU(name ='L0_Relu0')
if arch == 'C':
self.layer1 = self._make_layer(block = BasicBlock, planes = channels[0], blocks = layers[0], stride=1, data_format = self.data_format, key='CL1')
self.layer2 = self._make_layer(block = BasicBlock, planes = channels[1], blocks = layers[1], stride=2, data_format = self.data_format, key='CL2')
elif arch == 'D':
self.layer1 = self._make_conv_layers(channels = channels[0],convs = layers[0], stride=1, data_format = self.data_format, key='DL1')
self.layer2 = self._make_conv_layers(channels = channels[1],convs = layers[1], stride=2, data_format = self.data_format, key='DL2')
self.layer3 = self._make_layer(block = block, planes = channels[2], blocks = layers[2], stride=2, data_format = self.data_format, key='L3')
self.layer4 = self._make_layer(block = block, planes = channels[3], blocks = layers[3], stride=2, data_format = self.data_format, key='L4')
self.layer5 = self._make_layer(block = block, planes = channels[4], blocks = layers[4], dilation=2, new_level=False, data_format = self.data_format, key='L5')
self.layer6 = None if layers[5] == 0 else self._make_layer(block, channels[5], layers[5], dilation=4, new_level=False, data_format = self.data_format, key='L6')
if arch == 'C':
self.layer7 = None if layers[6] == 0 else self._make_layer(BasicBlock, channels[6], layers[6], dilation=2, new_level=False, residual=False, data_format = self.data_format, key='CL7')
self.layer8 = None if layers[7] == 0 else self._make_layer(BasicBlock, channels[7], layers[7], dilation=1, new_level=False, residual=False, data_format = self.data_format, key='CL8')
elif arch == 'D':
self.layer7 = None if layers[6] == 0 else self._make_conv_layers(channels[6], layers[6], dilation=2, data_format = self.data_format, key='DL7')
self.layer8 = None if layers[7] == 0 else self._make_conv_layers(channels[7], layers[7], dilation=1, data_format = self.data_format, key='DL8')
if num_classes > 0:
self.avgpool = tf.keras.layers.GlobalAveragePooling2D(data_format = self.data_format)
self.fc = tf.keras.layers.Dense(units=num_classes)
def _make_layer(self, block, planes, blocks, stride=1,dilation=1, new_level=True, data_format = 'channels_last', residual=True, key=None):
assert dilation == 1 or dilation % 2 == 0
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = tf.keras.Sequential([conv1x1(out_planes = planes * block.expansion,stride = stride, data_format = data_format),
tf.keras.layers.BatchNormalization(axis=self.bn_axis)], name = 'downsample')
#
layers = []
layers.append(block(planes= planes, stride = stride, downsample = downsample, dilation=(1, 1) if dilation == 1 else (
dilation // 2 if new_level else dilation, dilation), data_format=data_format, residual=residual, key = key, stage = '0'))
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(planes, residual=residual,dilation=(dilation, dilation), data_format=data_format, key = key, stage = i))
return tf.keras.Sequential(layers, name = key)
def _make_conv_layers(self, channels, convs, stride=1, dilation=1 ,data_format = 'channels_last', key = None):
modules = []
for i in range(convs):
modules.extend([
conv3x3(out_planes= channels, stride=stride if i == 0 else 1,
padding= 'same' , use_bias=False, dilation=dilation, data_format = data_format,name ='{}_{}_Conv'.format(key,i)),
tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='{}_{}_BN'.format(key,i)),
tf.keras.layers.ReLU(name ='{}_{}_Relu'.format(key,i))])
self.inplanes = channels
return tf.keras.Sequential(modules,name=key)
def call(self, x, training=None):
x = self.conv0(x)
x = self.bn0(x,training = training)
x = self.relu0(x)
x = self.layer1(x,training = training)
x = self.layer2(x,training = training)
x = self.layer3(x,training = training)
x = self.layer4(x,training = training)
x = self.layer5(x,training = training)
if self.layer6 is not None:
x = self.layer6(x,training = training)
if self.layer7 is not None:
x = self.layer7(x)
if self.layer8 is not None:
x = self.layer8(x)
if self.out_map:
x = self.fc(x)
else:
x = self.avgpool(x)
x = self.fc(x)
return x
def loss(logits, labels):
return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))
def make_scheduler(policy, init_lr, n_step_epoch, global_step):
total_steps= n_step_epoch * 10 #10 epochs
milestones = policy.split('_')
milestones.pop(0)
milestones = list(map(lambda x: int(x), milestones))
boundaries = np.multiply(milestones,n_step_epoch)
values = [init_lr] + [init_lr/(0.1**-i) for i in range(1,len(milestones)+1)]
learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
return learning_rate
def train(model, optimizer, step_counter ):
"""Trains model on `dataset` using `optimizer`."""
for (batch, i) in enumerate(range(10)):
print('Training Loop {}'.format(i))
images = tf.random.uniform((4, 224, 224,3))
labels = tf.constant(np.random.randint(4, size=4))
with tf.contrib.summary.record_summaries_every_n_global_steps(10, global_step=step_counter):
with tf.GradientTape() as tape:
logits = model(images, training=True)
loss_value = loss(logits, labels)
grads = tape.gradient(loss_value, model.variables)
optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)
def test(model):
"""Perform an evaluation of `model` on the examples from `dataset`."""
for i in (range(10)):
images = tf.random.uniform((4, 225, 225,3))
logits = model(images, training=False)
print(logits)
def main():
model = DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C',num_classes = 4)
device = '/gpu:0'
step_counter = tf.train.get_or_create_global_step()
lr = make_scheduler(policy='multistep_2_5',init_lr=0.1,n_step_epoch = 10,global_step= step_counter)
optimizer = tf.train.MomentumOptimizer(lr,momentum=0.5)
with tf.device(device):
for _ in range(10):
train(model, optimizer,step_counter)
print(optimizer._lr_t)
test(model)
if __name__ == '__main__':
main()
文件“”,第 1 行,在 runfile('/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py', wdir='/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug')
文件“/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py”,第 709 行,运行文件 execfile(文件名,命名空间)
文件“/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py”,第 108 行,在 execfile exec(compile(f.read(), filename, 'exec') 中,命名空间)
文件“/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py”,第 311 行,在 main()
文件“/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py”,第 305 行,在主列车中(模型、优化器、step_counter)
文件“/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py”,第 284 行,在 train optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)
文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/optimizer.py”,第 598 行,在 apply_gradients self._prepare()
_prepare learning_rate = learning_rate() 中的文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/momentum.py”,第 87 行
文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/learning_rate_decay_v2.py”,第 171 行,decayed_lr 边界 = ops.convert_n_to_tensor(boundaries)
文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py”,第 1273 行,在 convert_n_to_tensor as_ref=False 中)
文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py”,第 1228 行,internal_convert_n_to_tensor 引发 TypeError(“值必须是一个列表。”)
TypeError:值必须是一个列表。
当我们提供恒定的学习率时,代码会按预期工作。有什么我们缺少的吗?
解决方案
推荐阅读
- yii2 - Yii2 删除操作在点击链接时被调用两次
- parallel-processing - Julia:为什么共享内存多线程不给我加速?
- css - CSS 模块 - 排除类被转换
- google-apps-script - 当它们匹配子字符串时,如何将行移一格?
- slidetoggle - 使用 slidetoggel 时关闭其他 div
- reactjs - React Router : Router one component exact other non exact
- image - 显示损坏的静态路线图像(Google Maps Directions API)
- nativescript - 自定义组件无法访问默认的 NativeScript 指令
- nested - Lisp:添加列表列表的各个元素
- vue.js - PhpStorm Vuetify 自动完成