首页 > 解决方案 > TensorFlow 2 自定义损失:“没有为任何变量提供梯度”错误

问题描述

我尝试使用自定义损失函数运行我的模块这是我的损失函数:

def softargmax(x, beta=1e10):
    x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
    return tf.cast(tf.reduce_sum(tf.nn.softmax(x*beta) * x_range, axis=-1), tf.int32)

def convert2(lang, tensor):
    result = ''
    for t in tensor:
        if t == 1 or t==2:continue
        if t!=0:
            result += lang.index_word[t]
    return result

def getScalar(inp, tar):
    inpS, tarS, losses = [], [], []
    for i in inp.numpy():
        inpS.append(convert2(lang, i))
    for i in tar.numpy():
        tarS.append(convert2(lang, i))    
    for i, t in zip(inpS, tarS):
        losses.append(1-Levenshtein.ratio(i, t))
        
    return tf.convert_to_tensor(sum(losses)/len(losses), dtype=tf.float32)
def loss(inp, tar):
    return tf.py_function(getScalar, [inp, tar], tf.float32)

我用我的模块训练

@tf.function  def train_step(inp, tar):
  tar_inp = tar[:, :-1]
  tar_real = tar[:, 1:]
  
  enc_padding_mask, combined_mask, dec_padding_mask = create_masks(inp, tar_inp)
  
  with tf.GradientTape() as g_tape, tf.GradientTape() as d_tape:
    
    predictions, _ = gen(inp, tar_inp, 
                                 True, 
                                 enc_padding_mask, 
                                 combined_mask, 
                                 dec_padding_mask)
    predicted_id = softargmax(predictions)
    
    pred_score = dis(predicted_id)
    real_score = dis(tar_real)
    
    #loss_g = distLoss(tar_real, predicted_id)
    loss_g = loss(tar_real, predicted_id)
    loss_d = discriminator_loss(real_score, pred_score)

    
  g_gradients = g_tape.gradient(loss_g, gen.trainable_variables)    
  d_gradients = d_tape.gradient(loss_d, dis.trainable_variables)
  g_optimizer.apply_gradients(zip(g_gradients, gen.trainable_variables))
  d_optimizer.apply_gradients(zip(d_gradients, dis.trainable_variables))
  
  return loss_g, loss_d

但训练时间出错

ValueError:没有为任何变量提供渐变:ValueError:在用户代码中:

<ipython-input-161-be38d41d405a>:30 train_step  *
    g_optimizer.apply_gradients(zip(g_gradients, gen.trainable_variables))
C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:513 apply_gradients  **
    grads_and_vars = _filter_grads(grads_and_vars)
C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\keras\optimizer_v2\optimizer_v2.py:1271 _filter_grads
    ([v.name for _, v in grads_and_vars],))

ValueError: No gradients provided for any variable: ['transformer_18/encoder_18/embedding_46/embeddings:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_886/kernel:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_886/bias:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_887/kernel:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_887/bias:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_888/kernel:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_888/bias:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_889/kernel:0', 'transformer_18/encoder_18/encoder_layer_53/multi_head_attention_159/dense_889/bias:0', 'dense_890/kernel:0', 'dense_890/bias:0', 'dense_891/kernel:0', 'dense_891/bias:0', 'transformer_18/encoder_18/encoder_layer_53/layer_normalization_265/gamma:0', 'transformer_18/encoder_18/encoder_layer_53/layer_normalization_265/beta:0', 'transformer_18/encoder_18/encoder_layer_53/layer_normalization_266/gamma:0', 'transformer_18/encoder_18/encoder_layer_53/layer_normalization_266/beta:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_892/kernel:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_892/bias:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_893/kernel:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_893/bias:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_894/kernel:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_894/bias:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_895/kernel:0', 'transformer_18/encoder_18/encoder_layer_54/multi_head_attention_160/dense_895/bias:0', 'dense_896/kernel:0', 'dense_896/bias:0', 'dense_897/kernel:0', 'dense_897/bias:0', 'transformer_18/encoder_18/encoder_layer_54/layer_normalization_267/gamma:0', 'transformer_18/encoder_18/encoder_layer_54/layer_normalization_267/beta:0', 'transformer_18/encoder_18/encoder_layer_54/layer_normalization_268/gamma:0', 'transformer_18/encoder_18/encoder_layer_54/layer_normalization_268/beta:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_898/kernel:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_898/bias:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_899/kernel:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_899/bias:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_900/kernel:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_900/bias:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_901/kernel:0', 'transformer_18/encoder_18/encoder_layer_55/multi_head_attention_161/dense_901/bias:0', 'dense_902/kernel:0', 'dense_902/bias:0', 'dense_903/kernel:0', 'dense_903/bias:0', 'transformer_18/encoder_18/encoder_layer_55/layer_normalization_269/gamma:0', 'transformer_18/encoder_18/encoder_layer_55/layer_normalization_269/beta:0', 'transformer_18/encoder_18/encoder_layer_55/layer_normalization_270/gamma:0', 'transformer_18/encoder_18/encoder_layer_55/layer_normalization_270/beta:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_904/kernel:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_904/bias:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_905/kernel:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_905/bias:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_906/kernel:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_906/bias:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_907/kernel:0', 'transformer_18/encoder_18/encoder_layer_56/multi_head_attention_162/dense_907/bias:0', 'dense_908/kernel:0', 'dense_908/bias:0', 'dense_909/kernel:0', 'dense_909/bias:0', 'transformer_18/encoder_18/encoder_layer_56/layer_normalization_271/gamma:0', 'transformer_18/encoder_18/encoder_layer_56/layer_normalization_271/beta:0', 'transformer_18/encoder_18/encoder_layer_56/layer_normalization_272/gamma:0', 'transformer_18/encoder_18/encoder_layer_56/layer_normalization_272/beta:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_910/kernel:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_910/bias:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_911/kernel:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_911/bias:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_912/kernel:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_912/bias:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_913/kernel:0', 'transformer_18/encoder_18/encoder_layer_57/multi_head_attention_163/dense_913/bias:0', 'dense_914/kernel:0', 'dense_914/bias:0', 'dense_915/kernel:0', 'dense_915/bias:0', 'transformer_18/encoder_18/encoder_layer_57/layer_normalization_273/gamma:0', 'transformer_18/encoder_18/encoder_layer_57/layer_normalization_273/beta:0', 'transformer_18/encoder_18/encoder_layer_57/layer_normalization_274/gamma:0', 'transformer_18/encoder_18/encoder_layer_57/layer_normalization_274/beta:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_916/kernel:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_916/bias:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_917/kernel:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_917/bias:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_918/kernel:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_918/bias:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_919/kernel:0', 'transformer_18/encoder_18/encoder_layer_58/multi_head_attention_164/dense_919/bias:0', 'dense_920/kernel:0', 'dense_920/bias:0', 'dense_921/kernel:0', 'dense_921/bias:0', 'transformer_18/encoder_18/encoder_layer_58/layer_normalization_275/gamma:0', 'transformer_18/encoder_18/encoder_layer_58/layer_normalization_275/beta:0', 'transformer_18/encoder_18/encoder_layer_58/layer_normalization_276/gamma:0', 'transformer_18/encoder_18/encoder_layer_58/layer_normalization_276/beta:0', 'transformer_18/decoder_18/embedding_47/embeddings:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_922/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_922/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_923/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_923/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_924/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_924/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_925/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_165/dense_925/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_926/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_926/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_927/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_927/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_928/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_928/bias:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_929/kernel:0', 'transformer_18/decoder_18/decoder_layer_53/multi_head_attention_166/dense_929/bias:0', 'dense_930/kernel:0', 'dense_930/bias:0', 'dense_931/kernel:0', 'dense_931/bias:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_277/gamma:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_277/beta:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_278/gamma:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_278/beta:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_279/gamma:0', 'transformer_18/decoder_18/decoder_layer_53/layer_normalization_279/beta:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_932/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_932/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_933/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_933/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_934/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_934/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_935/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_167/dense_935/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_936/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_936/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_937/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_937/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_938/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_938/bias:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_939/kernel:0', 'transformer_18/decoder_18/decoder_layer_54/multi_head_attention_168/dense_939/bias:0', 'dense_940/kernel:0', 'dense_940/bias:0', 'dense_941/kernel:0', 'dense_941/bias:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_280/gamma:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_280/beta:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_281/gamma:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_281/beta:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_282/gamma:0', 'transformer_18/decoder_18/decoder_layer_54/layer_normalization_282/beta:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_942/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_942/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_943/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_943/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_944/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_944/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_945/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_169/dense_945/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_946/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_946/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_947/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_947/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_948/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_948/bias:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_949/kernel:0', 'transformer_18/decoder_18/decoder_layer_55/multi_head_attention_170/dense_949/bias:0', 'dense_950/kernel:0', 'dense_950/bias:0', 'dense_951/kernel:0', 'dense_951/bias:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_283/gamma:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_283/beta:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_284/gamma:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_284/beta:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_285/gamma:0', 'transformer_18/decoder_18/decoder_layer_55/layer_normalization_285/beta:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_952/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_952/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_953/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_953/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_954/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_954/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_955/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_171/dense_955/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_956/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_956/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_957/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_957/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_958/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_958/bias:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_959/kernel:0', 'transformer_18/decoder_18/decoder_layer_56/multi_head_attention_172/dense_959/bias:0', 'dense_960/kernel:0', 'dense_960/bias:0', 'dense_961/kernel:0', 'dense_961/bias:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_286/gamma:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_286/beta:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_287/gamma:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_287/beta:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_288/gamma:0', 'transformer_18/decoder_18/decoder_layer_56/layer_normalization_288/beta:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_962/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_962/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_963/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_963/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_964/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_964/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_965/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_173/dense_965/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_966/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_966/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_967/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_967/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_968/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_968/bias:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_969/kernel:0', 'transformer_18/decoder_18/decoder_layer_57/multi_head_attention_174/dense_969/bias:0', 'dense_970/kernel:0', 'dense_970/bias:0', 'dense_971/kernel:0', 'dense_971/bias:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_289/gamma:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_289/beta:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_290/gamma:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_290/beta:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_291/gamma:0', 'transformer_18/decoder_18/decoder_layer_57/layer_normalization_291/beta:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_972/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_972/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_973/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_973/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_974/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_974/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_975/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_175/dense_975/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_976/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_976/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_977/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_977/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_978/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_978/bias:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_979/kernel:0', 'transformer_18/decoder_18/decoder_layer_58/multi_head_attention_176/dense_979/bias:0', 'dense_980/kernel:0', 'dense_980/bias:0', 'dense_981/kernel:0', 'dense_981/bias:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_292/gamma:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_292/beta:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_293/gamma:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_293/beta:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_294/gamma:0', 'transformer_18/decoder_18/decoder_layer_58/layer_normalization_294/beta:0', 'transformer_18/dense_982/kernel:0', 'transformer_18/dense_982/bias:0'].

我不知道如何调试,我试图删除 .numpy(),但它没有用,大家可以帮帮我吗?谢谢。

现在我从 softargmax 中删除 tf.cast

def softargmax(x, beta=1e10):
    x_range = tf.range(x.shape.as_list()[-1], dtype=x.dtype)
    return tf.reduce_sum(tf.nn.softmax(x*beta) * x_range, axis=-1)

,但收到警告:

WARNING:tensorflow:调用 GradientTape.gradient 时,源张量的 dtype 必须是浮动的(例如 tf.float32),得到 tf.int32

和错误

TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
     [[node gradient_tape/EagerPyFunc (defined at <ipython-input-311-3c818a224e1b>:31) ]]
  (1) Invalid argument:  TypeError: Cannot convert 0.0 to EagerTensor of dtype int32

我对损失和梯度之间的 dtype 感到困惑

回溯:

InvalidArgumentError: 2 root error(s) found.
  (0) Invalid argument:  TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
Traceback (most recent call last):

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 242, in __call__
    return func(device, token, args)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 143, in __call__
    for (x, dtype) in zip(ret, self._out_dtypes)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 143, in <listcomp>
    for (x, dtype) in zip(ret, self._out_dtypes)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 119, in _convert
    return constant_op.constant(0.0, dtype=dtype)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
    allow_broadcast=True)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 275, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 300, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)

TypeError: Cannot convert 0.0 to EagerTensor of dtype int32


     [[node gradient_tape/EagerPyFunc (defined at <ipython-input-468-88e1bc3a3f24>:30) ]]
  (1) Invalid argument:  TypeError: Cannot convert 0.0 to EagerTensor of dtype int32
Traceback (most recent call last):

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 242, in __call__
    return func(device, token, args)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 143, in __call__
    for (x, dtype) in zip(ret, self._out_dtypes)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 143, in <listcomp>
    for (x, dtype) in zip(ret, self._out_dtypes)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\ops\script_ops.py", line 119, in _convert
    return constant_op.constant(0.0, dtype=dtype)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 264, in constant
    allow_broadcast=True)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 275, in _constant_impl
    return _constant_eager_impl(ctx, value, dtype, shape, verify_shape)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 300, in _constant_eager_impl
    t = convert_to_eager_tensor(value, ctx, dtype)

  File "C:\Users\islab\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\framework\constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)

TypeError: Cannot convert 0.0 to EagerTensor of dtype int32


     [[node gradient_tape/EagerPyFunc (defined at <ipython-input-468-88e1bc3a3f24>:30) ]]
     [[GroupCrossDeviceControlEdges_0/Adam/Adam/Const/_85]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_step_1355962]

Errors may have originated from an input operation.
Input Source operations connected to node gradient_tape/EagerPyFunc:
 EagerPyFunc (defined at <ipython-input-459-043bdf351e86>:20)

Input Source operations connected to node gradient_tape/EagerPyFunc:
 EagerPyFunc (defined at <ipython-input-459-043bdf351e86>:20)

Function call stack:
train_step -> train_step

标签: numpytensorflowmachine-learningnlpgradient

解决方案


推荐阅读