首页 > 解决方案 > Tensorflow:多步衰减函数的输出返回 TypeError

问题描述

我们正在尝试使用此处建议的 tf.train.piecewise_constant() 在 Tensorflow 中编写多步衰减函数。此处的TensorFlow 文档指出:

“启用急切执行时,此函数返回一个函数,该函数又返回衰减的学习率张量”

但是,当我们尝试运行代码时,它返回了一个 TypeError。即使使用 lr() 也会返回相同的错误。

import tensorflow as tf
tf.enable_eager_execution()
import numpy as np

def conv3x3(out_planes, data_format ='channels_last',  stride=1, padding='same', dilation=1, name = None,use_bias = False):
    """3x3 convolution with padding"""
    return  tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 3,data_format= data_format,
                                   strides=(stride, stride), padding='same', use_bias=use_bias,
                                   dilation_rate = (dilation,dilation) , kernel_initializer=tf.initializers.he_normal(),name = name)


def conv1x1(out_planes,data_format ='channels_last', padding = 'same', stride=1):
    """1x1 convolution"""
    return tf.keras.layers.Conv2D(filters = out_planes, kernel_size = 1, strides=(stride, stride),data_format= data_format,
                                  padding=padding, use_bias=False, kernel_initializer=tf.initializers.he_normal())

class BasicBlock(tf.keras.Model):
    expansion = 1

    def __init__(self, planes=1, stride=1, data_format= 'channels_last', downsample=None,  dilation=(1, 1), residual=True, key=None, stage = None):
        super(BasicBlock, self).__init__()
        self.data_format = data_format
        bn_axis = 1 if self.data_format == 'channels_first' else 3
        self.conv1 = conv3x3(out_planes= planes, stride = stride, padding='same' ,
                             data_format = self.data_format, dilation=dilation[0], name = '{}_{}_conv0'.format(key,stage))

        self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis, name = '{}_{}_BN0'.format(key,stage))

        self.conv2 = conv3x3(out_planes =planes, padding='same',
                             data_format = self.data_format, dilation=dilation[0],name = '{}_{}_conv1'.format(key,stage))

        self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis,name = '{}_{}_BN1'.format(key,stage))

        self.downsample = downsample
        self.relu = tf.keras.layers.ReLU(name = '{}_{}_Relu'.format(key,stage))
        self.stride = stride
        self.residual = residual

    def get_config(self):
        base_config = {}
        base_config['conv1'] = self.conv1.get_config()
        base_config['bn1'] = self.bn1.get_config()
        base_config['conv2'] = self.conv2.get_config()
        base_config['bn2'] = self.bn2.get_config()
        if self.downsample is not None:
            base_config['downsample'] = self.downsample.get_config()
        return base_config


    def call(self, inputs, training=None):
        residual = inputs
        out = self.conv1(inputs)
        out = self.bn1(out,training = training)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(inputs)
        if self.residual:
            out += residual
        out = self.relu(out)
        return out


class Bottleneck(tf.keras.Model):
    expansion = 4

    def __init__(self, planes, stride=1, data_format = 'channels_last',downsample=None,dilation=(1, 1)):
        super(Bottleneck, self).__init__()

        bn_axis = 1 if data_format == 'channels_first' else 3
        self.conv1 = conv1x1(planes, data_format = data_format)
        self.bn1 = tf.keras.layers.BatchNormalization(axis=bn_axis)
        self.relu = tf.keras.layers.ReLU()
        self.conv2 = conv3x3(planes, stride, padding= 'same', bias=False,  data_format = data_format, dilation=dilation[1])
        self.bn2 = tf.keras.layers.BatchNormalization(axis=bn_axis)
        self.conv3 =conv1x1( planes * 4, data_format = data_format, )
        self.bn3 =  tf.keras.layers.BatchNormalization(axis=bn_axis) # nn.BatchNorm2d(planes * self.expansion)
        self.downsample = downsample
        self.stride = stride

    def get_config(self):
        base_config = {}
        base_config['conv1'] = self.conv1.get_config()
        base_config['bn1'] = self.bn1.get_config()
        base_config['conv2'] = self.conv2.get_config()
        base_config['bn2'] = self.bn2.get_config()
        base_config['conv3'] = self.conv3.get_config()
        base_config['bn3'] = self.bn3.get_config()
        if self.downsample is not None:
            base_config['downsample'] = self.downsample.get_config()
        return base_config



    def call(self, inputs, training=None):
        identity = inputs
        out = self.conv1(inputs)
        out = self.bn1(out,training = training)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out,training = training)
        out = tf.nn.relu(out)
        out = self.conv3(out)
        out = self.bn3(out,training = training)
        if self.downsample is not None:
            identity = self.downsample(inputs)
        out += identity
        out = self.relu(out)
        return out

class pooling (tf.keras.Model):
    def __init__(self, pool_size, stride = None, data_format='channels_last'):
        super(pooling, self).__init__()
        self.pool_size = pool_size
        self.data_format = data_format
        if stride is None:
            self.stride =self.pool_size
        else:
            self.stride = stride


    def call(self, inputs):
        return tf.layers.average_pooling2d(inputs, strides =self.stride, pool_size = self.pool_size, data_format = self.data_format)


class DRN(tf.keras.Model):
    def __init__(self, block, layers, data_format='channels_last', num_classes=7,channels=(16, 32, 64, 128, 256, 512, 512, 512),
                 out_map=False, out_middle=False, pool_size=28, arch='D'):
        super(DRN, self).__init__()
        self.inplanes = channels[0]
        self.out_map = out_map
        self.out_dim = channels[-1]
        self.out_middle = out_middle
        self.arch = arch
        self.poolsize = pool_size
        self.data_format = data_format
        self.bn_axis = 1 if data_format == 'channels_first' else 3

        self.conv0 = tf.keras.layers.Conv2D(filters=channels[0], kernel_size=7, strides=1,  padding='same',
                                               use_bias=False, data_format = self.data_format, kernel_initializer=tf.initializers.he_normal(), name ='L0_conv0' )
        self.bn0 = tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='L0_BN0')
        self.relu0 = tf.keras.layers.ReLU(name ='L0_Relu0')


        if arch == 'C':
            self.layer1 = self._make_layer(block = BasicBlock, planes = channels[0], blocks = layers[0], stride=1, data_format = self.data_format, key='CL1')
            self.layer2 = self._make_layer(block = BasicBlock, planes =  channels[1], blocks = layers[1], stride=2, data_format = self.data_format, key='CL2')
        elif arch == 'D':
            self.layer1 = self._make_conv_layers(channels = channels[0],convs = layers[0], stride=1, data_format = self.data_format, key='DL1')
            self.layer2 = self._make_conv_layers(channels = channels[1],convs = layers[1], stride=2, data_format = self.data_format, key='DL2')


        self.layer3 = self._make_layer(block = block, planes = channels[2], blocks = layers[2], stride=2, data_format = self.data_format, key='L3')
        self.layer4 = self._make_layer(block = block, planes = channels[3], blocks = layers[3], stride=2, data_format = self.data_format, key='L4')
        self.layer5 = self._make_layer(block = block, planes = channels[4], blocks = layers[4], dilation=2, new_level=False, data_format = self.data_format, key='L5')
        self.layer6 = None if layers[5] == 0 else self._make_layer(block, channels[5], layers[5], dilation=4, new_level=False, data_format = self.data_format, key='L6')

        if arch == 'C':
            self.layer7 = None if layers[6] == 0 else self._make_layer(BasicBlock, channels[6], layers[6], dilation=2, new_level=False, residual=False, data_format = self.data_format, key='CL7')
            self.layer8 = None if layers[7] == 0 else self._make_layer(BasicBlock, channels[7], layers[7], dilation=1, new_level=False, residual=False, data_format = self.data_format, key='CL8')
        elif arch == 'D':
            self.layer7 = None if layers[6] == 0 else self._make_conv_layers(channels[6], layers[6], dilation=2, data_format = self.data_format, key='DL7')
            self.layer8 = None if layers[7] == 0 else self._make_conv_layers(channels[7], layers[7], dilation=1, data_format = self.data_format, key='DL8')

        if num_classes > 0:
            self.avgpool = tf.keras.layers.GlobalAveragePooling2D(data_format = self.data_format)
            self.fc = tf.keras.layers.Dense(units=num_classes)


    def _make_layer(self, block, planes, blocks, stride=1,dilation=1, new_level=True, data_format = 'channels_last', residual=True, key=None):
        assert dilation == 1 or dilation % 2 == 0
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = tf.keras.Sequential([conv1x1(out_planes = planes * block.expansion,stride = stride, data_format = data_format),
                      tf.keras.layers.BatchNormalization(axis=self.bn_axis)], name = 'downsample')

#
        layers = []
        layers.append(block(planes= planes, stride =  stride, downsample = downsample, dilation=(1, 1) if dilation == 1 else (
                dilation // 2 if new_level else dilation, dilation), data_format=data_format, residual=residual, key = key, stage = '0'))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(planes, residual=residual,dilation=(dilation, dilation), data_format=data_format, key = key, stage = i))
        return tf.keras.Sequential(layers, name = key)


    def _make_conv_layers(self, channels, convs, stride=1, dilation=1 ,data_format = 'channels_last', key = None):
        modules = []
        for i in range(convs):
            modules.extend([
                conv3x3(out_planes= channels, stride=stride if i == 0 else 1,
                          padding= 'same' , use_bias=False, dilation=dilation,  data_format = data_format,name ='{}_{}_Conv'.format(key,i)),
                tf.keras.layers.BatchNormalization(axis=self.bn_axis,name ='{}_{}_BN'.format(key,i)),
                tf.keras.layers.ReLU(name ='{}_{}_Relu'.format(key,i))])
            self.inplanes = channels
        return tf.keras.Sequential(modules,name=key)


    def call(self, x, training=None):
        x = self.conv0(x)
        x = self.bn0(x,training = training)
        x = self.relu0(x)
        x = self.layer1(x,training = training)
        x = self.layer2(x,training = training)
        x = self.layer3(x,training = training)
        x = self.layer4(x,training = training)
        x = self.layer5(x,training = training)

        if self.layer6 is not None:
            x = self.layer6(x,training = training)

        if self.layer7 is not None:
            x = self.layer7(x)
        if self.layer8 is not None:
            x = self.layer8(x)
        if self.out_map:
            x = self.fc(x)
        else:
            x = self.avgpool(x)
            x = self.fc(x)
        return x

def loss(logits, labels):
  return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels))

def make_scheduler(policy, init_lr, n_step_epoch, global_step):
    total_steps= n_step_epoch * 10 #10 epochs
    milestones = policy.split('_')
    milestones.pop(0)
    milestones = list(map(lambda x: int(x), milestones))
    boundaries = np.multiply(milestones,n_step_epoch)
    values = [init_lr] + [init_lr/(0.1**-i) for i in  range(1,len(milestones)+1)]
    learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
    return learning_rate


def train(model, optimizer, step_counter ):
  """Trains model on `dataset` using `optimizer`."""

  for (batch, i) in enumerate(range(10)):
      print('Training Loop {}'.format(i))
      images = tf.random.uniform((4, 224, 224,3))
      labels = tf.constant(np.random.randint(4, size=4))
      with tf.contrib.summary.record_summaries_every_n_global_steps(10, global_step=step_counter):
          with tf.GradientTape() as tape:
            logits = model(images, training=True)
            loss_value = loss(logits, labels)
          grads = tape.gradient(loss_value, model.variables)
          optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)


def test(model):
  """Perform an evaluation of `model` on the examples from `dataset`."""
  for  i in (range(10)):
    images = tf.random.uniform((4, 225, 225,3))
    logits = model(images, training=False)
    print(logits)

def main():
    model =  DRN(BasicBlock, [1, 1, 2, 2, 2, 2, 1, 1], arch='C',num_classes = 4)
    device = '/gpu:0'
    step_counter = tf.train.get_or_create_global_step()
    lr = make_scheduler(policy='multistep_2_5',init_lr=0.1,n_step_epoch = 10,global_step= step_counter)
    optimizer = tf.train.MomentumOptimizer(lr,momentum=0.5)

    with tf.device(device):
        for _ in range(10):
           train(model, optimizer,step_counter)
           print(optimizer._lr_t)
           test(model)

if __name__ == '__main__':
  main()

文件“”,第 1 行,在 runfile('/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py', wdir='/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug')

文件“/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py”,第 709 行,运行文件 execfile(文件名,命名空间)

文件“/home/srijith/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py”,第 108 行,在 execfile exec(compile(f.read(), filename, 'exec') 中,命名空间)

文件“/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py”,第 311 行,在 main()

文件“/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py”,第 305 行,在主列车中(模型、优化器、step_counter)

文件“/home/srijith/work/Tensorflow/SkinCaner_tensorflow/debug/stackoverflow.py”,第 284 行,在 train optimizer.apply_gradients(zip(grads, model.variables), global_step=step_counter)

文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/optimizer.py”,第 598 行,在 apply_gradients self._prepare()

_prepare learning_rate = learning_rate() 中的文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/momentum.py”,第 87 行

文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/learning_rate_decay_v2.py”,第 171 行,decayed_lr 边界 = ops.convert_n_to_tensor(boundaries)

文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py”,第 1273 行,在 convert_n_to_tensor as_ref=False 中)

文件“/home/srijith/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/ops.py”,第 1228 行,internal_convert_n_to_tensor 引发 TypeError(“值必须是一个列表。”)

TypeError:值必须是一个列表。

当我们提供恒定的学习率时,代码会按预期工作。有什么我们缺少的吗?

标签: pythontensorflowoptimizationdeep-learningpiecewise

解决方案


推荐阅读