python - ValueError:没有为任何变量提供梯度:['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0']。演员评论家的错误
问题描述
我正在尝试构建一个演员评论脚本,但我遇到了这个错误。我的环境在 MATLAB 中,演员评论家在 python 中。该错误来自 NoneObject 类型的 actor 的梯度。当我更改演员的尺寸时会发生这种情况(因为这里的演员有 39 个输出)。
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from datetime import datetime
from keras import backend as BK
from temp_ext_maker import ext_temp_days_int
import matlab.engine
import random
eng = matlab.engine.start_matlab()
def c_sigmoid(x) :
y = BK.sigmoid(x)
scale = 100
return y * scale
path = 'C:/Users/danie/OneDrive/Desktop/POLIMI/tesi/settimana12/'
date = datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
file = open(path +"demo/aop1_demo_" + date + ".txt", "w")
states = np.array([1])
input_shape = np.shape(states)
num_hidden = 64
gamma = 0.99
#actor
input_a = layers.Input(shape=(input_shape))
hidden_a = layers.Dense(num_hidden, activation="tanh")(input_a)
action = layers.Dense(39, activation=c_sigmoid)(hidden_a)
model_a = keras.Model(inputs=input_a, outputs=action)
#critic
input_c = layers.Input(shape=(input_shape))
hidden_c = layers.Dense(num_hidden, activation="tanh")(input_c)
critic = layers.Dense(1)(hidden_c)
model_c = keras.Model(inputs=input_c, outputs=critic)
optimizer = keras.optimizers.Adam(learning_rate=0.01)
action_history = []
critic_value_history = []
rewards_history = []
running_reward = 0
episode_count = 0
eps = np.finfo(np.float32).eps.item()
huber_loss = keras.losses.Huber()
while True: # Run until solved
#init
disturb = random.choice(ext_temp_days_int)
flag = 1
while flag:
if max(disturb) >= 25:
disturb = random.choice(ext_temp_days_int)
else:
setpoint = np.random.uniform(low=max(disturb), high=25)
flag = 0
state = [random.uniform(0, setpoint-10) for iter in range(18)]
error = np.mean(state) - setpoint
wall = [random.uniform(0, 25) for iter in range(18)]
people = [random.uniform(0, 100) for iter in range(18)]
tank = [random.uniform(25, 45) for iter in range(8)]
uta = [random.uniform(15, 25) for iter in range(2)]
co2 = [random.uniform(5, 20) for iter in range(18)]
sample_t = 5
room_length = 1440
episode_reward = 0
with tf.GradientTape() as a_tape, tf.GradientTape() as c_tape:
for timestep in range(1, 10000):
ind = (1440 - room_length)//5
error = tf.convert_to_tensor(error, dtype=tf.float32)
error = tf.expand_dims(error, 0)
action = model_a(error)
action = tf.keras.backend.get_value(action)[0]
mat_state = np.concatenate((state, wall, co2, uta, tank))
mat_state = mat_state.tolist()
#action matching
for i in range(0,19):
if action[i] < 25:
action[i] = 0
elif action[i] >= 25 and action[i] < 50:
action[i] = 1
elif action[i] >= 50 and action[i] < 75:
action[i] = 2
else:
action[i] = 3
action[38] = action[38]/100
action = np.insert(action, 0, setpoint)
people = np.array(people)
action = np.concatenate((people, action))
action = np.insert(action, 0, disturb[ind])
action = action.tolist()
#environment return
ret = np.asarray(eng.start(mat_state, action, sample_t))*sample_t
ret = np.reshape(ret, (64,))
state += ret[0:18]
error = np.mean(state) - setpoint
wall += ret[18:36]
co2 += ret[36:54]
uta += ret[54:56]
tank += ret[56:65]
room_length -= 5
reward = 1/((error/setpoint)**2 + 1)
rewards_history.append(reward)
episode_reward += reward
error = tf.convert_to_tensor(error, dtype=tf.float32)
error = tf.expand_dims(error, 0)
critic_value = model_c(error)
critic_value_history.append(critic_value[0, 0])
if room_length <= 0:
break
file.write("episode end " + str(episode_count) + "\n")
running_reward = 0.05 * episode_reward + (1 - 0.05) * running_reward
returns = []
discounted_sum = 0
for r in rewards_history[::-1]:
discounted_sum = r + gamma * discounted_sum
returns.insert(0, discounted_sum)
returns = np.array(returns)
returns = (returns - np.mean(returns)) / (np.std(returns) + eps)
returns = returns.tolist()
history = zip (critic_value_history, returns)
actor_losses = []
critic_losses = []
for value, ret in history:
diff = ret - value
actor_losses.append(diff)
critic_losses.append(huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0)))
# Backpropagation
loss_c_value = sum(critic_losses)
loss_a_value = sum(actor_losses)
grads_a = a_tape.gradient(loss_a_value, model_a.trainable_variables)
grads_c = c_tape.gradient(loss_c_value, model_c.trainable_variables)
optimizer.apply_gradients(zip(grads_a, model_a.trainable_variables))
optimizer.apply_gradients(zip(grads_c, model_c.trainable_variables))
# Clear the loss and reward history
action_history.clear()
critic_value_history.clear()
rewards_history.clear()
episode_count += 1
print("run rew: " + str(tf.keras.backend.get_value(running_reward)[0, 0]) +
" ep rew: " + str(tf.keras.backend.get_value(episode_reward)[0, 0])+
" episode: " + str(episode_count))
if running_reward >= 220 : # Condition to consider the task solved
print("solved!")
model_json = model_a.to_json()
with open(path + "saved_models/aop1_jmodel_" + date + ".json", "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model_a.save_weights(path + "saved_models/aop1_weights_" + date + ".h5")
break
解决方案
推荐阅读
- python - Django模型字段中verbose_name和help_text的目的是什么?
- python - Amazon Web Services - Django 中的 S3“您提供的授权机制不受支持”
- jupyter-notebook - 在 jupyter notebook 中使用 cython 进行行分析
- react-native - 错误 reactotron createStore is not function
- laravel - 使用 Carbon laravel 的意外数据 \r\n
- azure-devops - 使用 Azure Devops 和发布变量的 App.Exe 配置转换
- spring - Spring:如何在@Scheduled 注解中提供特定的属性文件?
- javascript - 如何删除所有重复的数组元素,只留下单个值?
- javascript - Vuetify - 鼠标悬停时下拉菜单不突出显示
- android - 从Android Q中的特定文件夹获取文件列表