首页 > 解决方案 > ValueError:没有为任何变量提供梯度:['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0']。演员评论家的错误

问题描述

我正在尝试构建一个演员评论脚本,但我遇到了这个错误。我的环境在 MATLAB 中,演员评论家在 python 中。该错误来自 NoneObject 类型的 actor 的梯度。当我更改演员的尺寸时会发生这种情况(因为这里的演员有 39 个输出)。

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from datetime import datetime
from keras import backend as BK
from temp_ext_maker import ext_temp_days_int
import matlab.engine
import random

eng = matlab.engine.start_matlab()

def c_sigmoid(x) :
   y = BK.sigmoid(x)
   scale = 100
return  y * scale

path = 'C:/Users/danie/OneDrive/Desktop/POLIMI/tesi/settimana12/'

date = datetime.now().strftime("%d_%m_%Y_%H_%M_%S")

file = open(path +"demo/aop1_demo_" + date + ".txt", "w")

states = np.array([1])
input_shape = np.shape(states)
num_hidden = 64
gamma = 0.99

#actor
input_a = layers.Input(shape=(input_shape))
hidden_a = layers.Dense(num_hidden, activation="tanh")(input_a)
action = layers.Dense(39, activation=c_sigmoid)(hidden_a)
model_a = keras.Model(inputs=input_a, outputs=action)

#critic
input_c = layers.Input(shape=(input_shape))
hidden_c = layers.Dense(num_hidden, activation="tanh")(input_c)
critic = layers.Dense(1)(hidden_c)
model_c = keras.Model(inputs=input_c, outputs=critic)


optimizer = keras.optimizers.Adam(learning_rate=0.01)
action_history = []
critic_value_history = []
rewards_history = []
running_reward = 0
episode_count = 0
eps = np.finfo(np.float32).eps.item()
huber_loss = keras.losses.Huber()

while True:  # Run until solved
    #init
    disturb = random.choice(ext_temp_days_int)
    flag = 1
    while flag:
        if max(disturb) >= 25:
           disturb = random.choice(ext_temp_days_int)
        else:
           setpoint = np.random.uniform(low=max(disturb), high=25)
           flag = 0
    state = [random.uniform(0, setpoint-10) for iter in range(18)]
    error = np.mean(state) - setpoint
    wall = [random.uniform(0, 25) for iter in range(18)]
    people = [random.uniform(0, 100) for iter in range(18)]
    tank = [random.uniform(25, 45) for iter in range(8)]
    uta = [random.uniform(15, 25) for iter in range(2)]
    co2 = [random.uniform(5, 20) for iter in range(18)]
    sample_t = 5
    room_length = 1440
    episode_reward = 0

    with tf.GradientTape() as a_tape, tf.GradientTape() as c_tape:
        for timestep in range(1, 10000):
        
           ind = (1440 - room_length)//5 
        
           error = tf.convert_to_tensor(error, dtype=tf.float32)
           error = tf.expand_dims(error, 0)
        
           action = model_a(error)
           action = tf.keras.backend.get_value(action)[0]

           mat_state = np.concatenate((state, wall, co2, uta, tank))
           mat_state = mat_state.tolist()
        
           #action matching
           for i in range(0,19):
               if action[i] < 25:
                   action[i] = 0
               elif action[i] >= 25 and action[i] < 50:
                   action[i] = 1
               elif action[i] >= 50 and action[i] < 75:
                   action[i] = 2
               else:
                   action[i] = 3
        
            action[38] = action[38]/100
            action = np.insert(action, 0, setpoint)
            people = np.array(people)
            action = np.concatenate((people, action))
            action = np.insert(action, 0, disturb[ind])
            action = action.tolist()
        
            #environment return
            ret = np.asarray(eng.start(mat_state, action, sample_t))*sample_t
            ret = np.reshape(ret, (64,))
            state += ret[0:18]
            error = np.mean(state) - setpoint
            wall += ret[18:36]
            co2 += ret[36:54]
            uta += ret[54:56]
            tank += ret[56:65]
    
            room_length -= 5
        
            reward = 1/((error/setpoint)**2 + 1)
            rewards_history.append(reward)
            episode_reward += reward
        
            error = tf.convert_to_tensor(error, dtype=tf.float32)
            error = tf.expand_dims(error, 0)
        
           critic_value = model_c(error)
           critic_value_history.append(critic_value[0, 0])
        
           if room_length <= 0:
               break
    
    file.write("episode end " + str(episode_count) + "\n")
    
    running_reward = 0.05 * episode_reward + (1 - 0.05) * running_reward
    
    returns = []
    discounted_sum = 0
    for r in rewards_history[::-1]:
        discounted_sum = r + gamma * discounted_sum
        returns.insert(0, discounted_sum)
        
    returns = np.array(returns)
    returns = (returns - np.mean(returns)) / (np.std(returns) + eps)
    returns = returns.tolist()
    
    history = zip (critic_value_history, returns)
    actor_losses = []
    critic_losses = []
    for value, ret in history:
        diff = ret - value
        actor_losses.append(diff)
        critic_losses.append(huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0)))
        
    # Backpropagation
    loss_c_value = sum(critic_losses)
    loss_a_value = sum(actor_losses)
    grads_a = a_tape.gradient(loss_a_value, model_a.trainable_variables)
    grads_c = c_tape.gradient(loss_c_value, model_c.trainable_variables)
    optimizer.apply_gradients(zip(grads_a, model_a.trainable_variables))
    optimizer.apply_gradients(zip(grads_c, model_c.trainable_variables))

    # Clear the loss and reward history
    action_history.clear()
    critic_value_history.clear()
    rewards_history.clear()

episode_count += 1
print("run rew: " + str(tf.keras.backend.get_value(running_reward)[0, 0]) + 
      " ep rew: " + str(tf.keras.backend.get_value(episode_reward)[0, 0])+
      " episode: " + str(episode_count))


if running_reward >= 220 :  # Condition to consider the task solved
    print("solved!")
    model_json = model_a.to_json()
    with open(path + "saved_models/aop1_jmodel_" + date + ".json", "w") as json_file:
        json_file.write(model_json)
        # serialize weights to HDF5
    model_a.save_weights(path + "saved_models/aop1_weights_" + date + ".h5")
    break

标签: pythontensorflowanaconda

解决方案


推荐阅读