首页 > 解决方案 > 新版keras导致形状不匹配

问题描述

以下代码在 tensorflow 2.3.1 下运行良好,但在 2.4.0 下,在尝试训练逆模型(它是一个 icm 神经网络)时,obs_last 和 obs_now 之间出现形状不匹配。在之前版本的 tensorflow 中,逆模型需要一个非类型形状的 IE。(None,None,1) 但现在它需要一个特定形状的 IE。(None,12,1) 有谁知道循环网络的变化可能导致这种情况发生。错误:https ://hastebin.com/uyizanabay.yaml

from keras.layers import Input, Concatenate, GRU, Dense, Reshape
from keras.optimizers import Adam
from keras.backend import clear_session
from pathlib import Path
from subprocess import Popen, PIPE, STDOUT, TimeoutExpired
import numpy as np
import tensorflow as tf
from rl.agents import SARSAAgent
from rl.policy import BoltzmannQPolicy
import os

cmd = 'echo Hello World!'
env_reward = 0
length_penalty = .25
learning_reward = 10

hidden_layers = 4
layer_neurons = 128
learning_rate = 0.001
nb_actions = 96

tf.get_logger().setLevel('ERROR')

done = False
cmd_in = True
obs_last = None
initialize = True

while True:
    if cmd_in:
        proc = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True)
        try:
            stdout = proc.communicate(timeout=1)[0].decode()
            exitcode = proc.returncode
        except TimeoutExpired:
            proc.kill()
            stdout = proc.communicate()[0].decode()
            exitcode = proc.returncode
        nnin = ''.join(char for char in stdout if char.isprintable())
        filename = Path('mem.txt')
        filename.touch(exist_ok=True)
        if not nnin:
            nnin = 'Done!'
            stdout = nnin
        if exitcode == 0:
            done = True
            with open('mem.txt', 'r+') as mem:
                for line in stdout.splitlines():
                    if line + '\n' not in mem:
                        mem.write(line + '\n')
                        env_reward += learning_reward
        cmd = ''
        print('\n')
        print(stdout)
        print('# ', end='', flush=True)
    else:
        nnin = cmd
        print(nnin[-1], end='', flush=True)
    idxs = np.frombuffer(nnin.encode(), dtype=np.uint8) - 32
    env = tf.one_hot(idxs, 95)
    shape = env.shape
    env_reward -= length_penalty


    def build_actor_model(shape, nb_actions):
        model = Sequential()
        model.add(Reshape(shape[1::], input_shape=shape))
        for layer in range(hidden_layers):
            model.add(GRU(layer_neurons, name='GRU' + str(layer), return_sequences=True))
        model.add(GRU(layer_neurons, name='GRU' + str(hidden_layers)))
        model.add(Dense(nb_actions, name='output', activation='softmax'))
        return model


    def build_main(shape, name_prefix='main.'):
        inputs = Input(shape=shape)
        x = inputs
        for layer in range(hidden_layers):
            x = GRU(layer_neurons, name=name_prefix + ('GRU' + str(layer)), return_sequences=True)(x)
        x = GRU(layer_neurons, name=name_prefix + ('GRU' + str(hidden_layers)))(x)
        model = Model(inputs, x, name=name_prefix + 'main')
        return model


    def build_inverse_model(obs1, obs2, nb_actions):
        x = Concatenate()([obs1.output, obs2.output])
        x = Dense(nb_actions, name='icm_i.output', activation='sigmoid')(x)
        i_model = Model([obs1.input, obs2.input], x, name='icm_inverse_model')
        return i_model


    def build_forward_model(obs1, nb_actions):
        act1 = Input(shape=nb_actions)
        x = Concatenate()([obs1.output, act1])
        output_shape = obs1.output_shape[1]
        x = Dense(output_shape, name='icm_f.output', activation='linear')(x)
        f_model = Model([obs1.input, act1], x, name='icm_forward_model')
        return f_model


    inv_weights_fname = '{}_inv_weights.h5f'.format("SMB")
    fwd_weights_fname = '{}_fwd_weights.h5f'.format("SMB")
    agent_weights_fname = '{}_agent_weights.h5f'.format("SMB")

    main = build_main(shape)
    main2 = build_main(shape, name_prefix='main2.')
    inverse_model = build_inverse_model(main, main2, nb_actions)
    inverse_model.compile(Adam(learning_rate), loss='mse', metrics=['mse'])
    forward_model = build_forward_model(main, nb_actions)
    forward_model.compile(Adam(learning_rate), loss='mse', metrics=['mse'])
    model = build_actor_model((1,) + shape, nb_actions)
    policy = BoltzmannQPolicy()
    agent = SARSAAgent(model=model, nb_actions=nb_actions, policy=policy)
    agent.compile(Adam(learning_rate), metrics=['mae'])
    agent.reset_states()

    if initialize:
        if os.path.isfile(inv_weights_fname):
            inverse_model.load_weights(inv_weights_fname)
        if os.path.isfile(fwd_weights_fname):
            forward_model.load_weights(fwd_weights_fname)
        if os.path.isfile(agent_weights_fname):
            agent.load_weights(agent_weights_fname)
        initialize = False
    agent.training = True
    
    obs_now = env
    if obs_last is None:
        obs_last = obs_now
    action = agent.forward(obs_now)
    icm_action = np.zeros(nb_actions)
    icm_action[action] = 1
    inv_loss = inverse_model.train_on_batch([np.expand_dims(obs_last, 0), np.expand_dims(obs_now, 0)],
                                            [np.expand_dims(icm_action, 0)])
    features_now = main.predict(np.expand_dims(obs_now, 0))
    fwd_loss = forward_model.train_on_batch([np.expand_dims(obs_last, 0), np.expand_dims(icm_action, 0)],
                                            [features_now])
    obs_last = obs_now
    r_intr = (fwd_loss[0] ** 0.5) / 100
    reward = r_intr + env_reward
    agent.backward(reward, done)
    clear_session()
    done = False

    enc_ascii = action + 32
    if enc_ascii != 127:
        cmd += chr(enc_ascii)
        cmd_in = False
        continue
    cmd_in = True
    inverse_model.save_weights(inv_weights_fname, overwrite=True)
    forward_model.save_weights(fwd_weights_fname, overwrite=True)
    agent.save_weights(agent_weights_fname, overwrite=True)

标签: tensorflowkerasneural-networkrecurrent-neural-networkkeras-layer

解决方案


联系了 tensorflow 开发团队,似乎是一个 bug。我找到了解决方法,但我不会实施它,除非它最终成为无法解决的情况。作为参考,解决方法是将输入填充到相同的长度,但这不是必需的


推荐阅读