python - 关于类或多行动画的 Python 代码?
问题描述
我是python的新手。我正在尝试的问题是关于优化。我想比较两种优化算法,即 RMSprop 和 Adam 与 Beale 函数。其实我是在网上下载了Adam算法,在原代码中加入了PMSprop。但是动画图告诉我,两种算法的粒子路径惊人地相同。(路径闪烁。)我确信它们应该不同。我尝试对 RMSprop 类进行一些严重更改,但结果没有改变。我不确定哪一步是错误的。动画步骤?还是叫课的步骤?
import sys
import matplotlib.pyplot as plt
import autograd.numpy as np
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import LogNorm
from matplotlib import animation
from autograd import elementwise_grad,value_and_grad,grad
from scipy.optimize import minimize
from collections import defaultdict
from itertools import zip_longest
from functools import partial
f = lambda x,y: (1.5 - x + x*y)**2 + (2.25 - x + x*y**2)**2 + (2.625 - x
+x*y**3)**2
xmin, xmax, xstep = -4.5, 4.5, 0.2
ymin, ymax, ystep = -4.5, 4.5, 0.2
x, y = np.meshgrid(np.arange(xmin, xmax + xstep, xstep),np.arange(ymin, ymax
+ ystep, ystep))
z = f(x,y)
minima = np.array([3.0,0.5])
minima_ = minima.reshape(-1,1)
def target_func(weights):
x,y = weights
return f(x,y)
class Adam:
def __init__(self, loss, weights, lr=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
self.loss = loss
self.theta = weights
self.lr = lr # learning rate
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.get_gradient = grad(loss)
self.m = 0
self.v = 0
self.t = 0
def minimize_trace(self, path=[]):
self.t +=1
g = self.get_gradient(self.theta)
self.m = self.beta1 * self.m + (1 - self.beta1) * g
self.v = self.beta2 * self.v + (1 - self.beta2) * (g * g)
self.m_hat = self.m / (1 - self.beta1 ** self.t)
self.v_hat = self.v / (1 - self.beta2 ** self.t)
self.theta -= self.lr * self.m_hat / (self.v_hat ** 0.5 + self.epsilon)
path.append(np.copy(self.theta))
class RMSprop:
def __init__(self, loss, weights, lr=0.001, beta1=0.9, beta2=0.999,
epsilon=1e-8):
self.loss = loss
self.theta = weights
self.lr = lr # learning rate
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.get_gradient = grad(loss)
self.m = 0
self.v = 0
self.t = 0
def minimize_trace(self, path=[]):
self.t +=1
g = self.get_gradient(self.theta)
## self.m = self.beta1 * self.m + (1 - self.beta1) * g
self.v = self.beta2 * self.v + (1 - self.beta2) * (g * g)
self.m_hat = self.m / (1 - self.beta1 ** self.t)
self.v_hat = self.v / (1 - self.beta2 ** self.t)
self.theta -= self.lr * self.m_hat / (self.v_hat ** 0.5 +
self.epsilon)
path.append(np.copy(self.theta))
EPOCHS = 3000
SHOW_STEPS = 100
PRECISION = 1e-8
weights = np.array([1,1.5])
path_trace_adam = [np.copy(weights)]
path_trace_rmsprop = [np.copy(weights)]
adam = Adam(target_func, weights, lr = 0.01)
rmsprop = RMSprop(target_func, weights, lr = 0.01)
for i in range(EPOCHS):
adam.minimize_trace(path_trace_adam)
rmsprop.minimize_trace(path_trace_rmsprop)
print("\n final weights:{} loss:{}".format(adam.theta, adam.loss(adam.theta)))
print("\n final weights:{} loss:{}".format(rmsprop.theta, rmsprop.loss(rmsprop.theta)))
path_trace_adam = np.array(path_trace_adam).T
path_trace_rmsprop = np.array(path_trace_rmsprop).T
shape_adam = path_trace_adam.shape
shape_rmsprop = path_trace_rmsprop.shape
if shape_adam[1] > SHOW_STEPS:
show_step_adam = shape_adam[1] // SHOW_STEPS
path_trace_adam = np.array(path_trace_adam[:,::show_step_adam])
if shape_rmsprop[1] > SHOW_STEPS:
show_step_rmsprop = shape_rmsprop[1] // SHOW_STEPS
path_trace_rmsprop = np.array(path_trace_rmsprop[:,::show_step_rmsprop])
################## Visualize Convergence Trace
fig, ax = plt.subplots(figsize=(10,10))
ax.contour(x, y, z, levels=np.logspace(0, 5, 35), norm=LogNorm(), cmap=plt.cm.jet)
ax.plot(*minima_, 'r*', markersize=12)
line_adam, = ax.plot([], [], 'r', label='Adam Optimizer', lw=2)
line_rmsprop, = ax.plot([], [], 'k', label='RMSprop Optimizer', lw=2)
point_adam, = ax.plot([], [], 'ro')
point_rmsprop, = ax.plot([], [], 'ko')
ax.set_xlabel('$x$')
ax.set_ylabel('$y$')
ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))
ax.legend(loc='upper left')
################### animation
def init_adam():
line_adam.set_data([], [])
point_adam.set_data([], [])
return line_adam, point_adam
def init_rmsprop():
line_rmsprop.set_data([], [])
point_rmsprop.set_data([], [])
return line_rmsprop, point_rmsprop
def animate_adam(i):
line_adam.set_data(*path_trace_adam[::,:i])
point_adam.set_data(*path_trace_adam[::,i-1:i])
return line_adam, point_adam
def animate_rmsprop(i):
line_rmsprop.set_data(*path_trace_rmsprop[::,:i])
point_rmsprop.set_data(*path_trace_rmsprop[::,i-1:i])
return line_rmsprop, point_rmsprop
anim_adam = animation.FuncAnimation(fig, animate_adam, init_func=init_adam,
frames=path_trace_adam.shape[1], interval=60,
repeat_delay=None, repeat=True, blit=True)
anim_rmsprop = animation.FuncAnimation(fig, animate_rmsprop, init_func=init_rmsprop,
frames=path_trace_rmsprop.shape[1], interval=60,
repeat_delay=None, repeat=True, blit=True)
plt.show()
解决方案
错误在这里:
weights = np.array([1,1.5])
# ... truncated for brevity
adam = Adam(target_func, weights, lr = 0.01)
rmsprop = RMSprop(target_func, weights, lr = 0.01)
由于对的引用weights
在两个例程之间共享,因此每次运行时Adam.minimize_trace
,RMSprop.minimize_trace
它们都会修改同一个数组。由于路径是从数组派生的,因此两者上的路径变得相同。
如果在将数组传递给两个构造函数之前复制数组,它应该可以按预期工作。
adam = Adam(target_func, np.copy(weights), lr = 0.01)
rmsprop = RMSprop(target_func, np.copy(weights), lr = 0.01)
推荐阅读
- bash - 递归地将目录中的每个文件截断为 20 行
- reactjs - 如何通知 React 组件状态变化
- json - 如何使用 sed 用字符包围 JSON 值
- scala - 如何找到适合 32 位整数的 n 的最大倍数
- excel - 如何在每个数据行之间将数据行从 A 列复制到 B 列
- computational-geometry - 将 IMU 角速度准确转换为四元数
- vb.net - 不知何故,如果我的 VB.net 应用程序无法在组合框列表索引中运行,我就无法退出
- c# - OnCollisionEnter 在应该被调用的时候没有被调用
- sql - 如何将水晶报表代码中的If语句转换为SQL case语句
- c - 自定义 WM_APP 消息未发布在消息队列中