首页 > 解决方案 > Python 3:如何评估 Tensor Flow 2.0 中的 Adam Gradient?我想替换我的实现

问题描述

我有以下运行良好的代码。但是,我坚信 Adam Gradient 的 Tensorflow 2.0 实现比我幼稚的实现更有效。如何用 Tensorflow 2.0 实现替换对 Adam Gradient 的评估?

import tensorflow as tf
import numpy as np
def linearModelGenerator(numberSamples):
    x = tf.random.normal(shape=(numberSamples,))
    y = 3*tf.ones(shape=(numberSamples,)) + tf.constant(5.0) * x +  tf.random.normal(shape=(numberSamples,),stddev=0.01)
    return x,y

class Adam:
    def __init__(self,shapes,lr=0.001,beta1=0.9,beta2=0.999,epsilon=1e-07):
        self.lr=lr
        self.beta1=beta1
        self.beta2=beta2
        self.epsilon=epsilon
        self.shapes=shapes
        self.m=np.shape(shapes)[0]
        self.listM=[]
        self.listV=[]
        self.t=0
        for i in range(self.m):
            if(np.isscalar(shapes[i])):
                self.listM.append(0)#append(tf.zeros(shapes[i]))
                self.listV.append(0)#append(tf.zeros(shapes[i]))
            else:
                self.append(tf.zeros(shapes[i]))
                self.append(tf.zeros(shapes[i]))

    def evalGradient(self,*args):
        adamGrad=[]
        self.t=self.t+1
        for i in range(self.m):
            grad=args[i]
            self.listM[i]=self.beta1*self.listM[i]+(1-self.beta1)*grad
            self.listV[i]=self.beta2*self.listV[i]+(1-self.beta2)*(grad*grad)
            hatM=self.listM[i]/(1-(self.beta1)**self.t)
            hatV=self.listV[i]/(1-(self.beta2)**self.t)
            adamGrad.append(hatM/(tf.math.sqrt(hatV)+(tf.ones(np.shape(hatV))*self.epsilon)))
        return adamGrad




class LinearModel:
    def __init__(self):
        self.weight = tf.Variable(-1.0)
        self.bias = tf.Variable(-1.0)
    def __call__(self, x):
        return self.weight * x + self.bias

def loss(y, pred):
    return tf.reduce_mean(tf.square(y - pred))


def trainAdam(linear_model,adam, x, y):
    with tf.GradientTape() as t:
        current_loss = loss(y, linear_model(x))
    gradWeight, gradBias = t.gradient(current_loss, [linear_model.weight, linear_model.bias])
    gradAdamList=adam.evalGradient(gradWeight,gradBias)
    gradAdamWeight=gradAdamList[0]
    gradAdamBias=gradAdamList[1]
    linear_model.weight.assign_sub(adam.lr * gradAdamWeight)
    linear_model.bias.assign_sub(adam.lr * gradAdamBias)        


if __name__=="__main__":
    numberSamples=100
    x,y=linearModelGenerator(numberSamples)
    linear_model = LinearModel()
    epochs = 1000
    shapes=[]
    shapes.append(1)
    shapes.append(1)
    adam=Adam(shapes,lr=0.1)
    for epoch_count in range(epochs):
         real_loss = loss(y, linear_model(x))
         trainAdam(linear_model,adam, x, y)

         print('w',linear_model.weight.numpy())

         print('bias',linear_model.bias.numpy())

         print('real_loss',real_loss.numpy())

我想保留代码的一般结构,但要替换 Adam Gradient Implementation。

标签: python-3.xtensorflow2.0

解决方案


TensorFlow 2 中的内置优化器不仅可以与 一起使用tf.keras.Model.fit(),还可以与tf.GradientTape(). 使用后者,您可以直接调用它的apply_gradients()方法。优化器对象将在内部跟踪累加器和运行时刻。粗略地说,您的代码可以修改如下:

adam = tf.optimizers.Adam(learning_rate)

def trainAdam(linear_model,adam, x, y):
    with tf.GradientTape() as t:
        current_loss = loss(y, linear_model(x))
    gradWeight, gradBias = t.gradient(current_loss, [linear_model.weight, linear_model.bias])
    adam.apply_gradients(zip([gradWeight, gradBias], [linear_model.weight, linear_model.bias]))

推荐阅读