首页 > 解决方案 > 英语到印地语翻译 LSTM

问题描述

我正在尝试使用深度学习 LSTM 实现英语到印地语的翻译。但是当我训练模型时,它在实际和验证中都显示出“nan”损失。

包含翻译对的文本文件链接-:http: //www.manythings.org/anki/

下面是我的 Jupyter 笔记本代码:

import string
import re
from numpy import array, argmax, random, take, delete
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Bidirectional, RepeatVector, TimeDistributed
from keras.preprocessing.text import Tokenizer
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from keras import optimizers
import matplotlib.pyplot as plt

# function to read raw text file
def read_text(filename):
    # open the file
    file = open(filename, mode='rt', encoding='utf-8')
    # read all text
    text = file.read()
    file.close()
    return text

# split a text into sentences
def to_lines(text):
    sents = text.strip().split('\n')
    sents = [i.split('\t') for i in sents]
    return sents

data = read_text("/content/drive/My Drive/Colab Notebooks/Language Translator New/hin.txt")
eng_hin = to_lines(data)
eng_hin = array(eng_hin)

eng_hin = delete(eng_hin, 2, axis=1)

# Remove punctuation
eng_hin[:,0] = [s.translate(str.maketrans('', '', string.punctuation)) for s in eng_hin[:,0]]
eng_hin[:,1] = [s.translate(str.maketrans('', '', string.punctuation)) for s in eng_hin[:,1]]

# convert to lowercase
for i in range(len(eng_hin)):
    eng_hin[i,0] = eng_hin[i,0].lower()
    eng_hin[i,1] = eng_hin[i,1].lower()

# empty lists
eng_l = []
hin_l = []

# populate the lists with sentence lengths
for i in eng_hin[:,0]:
    eng_l.append(len(i.split()))

for i in eng_hin[:,1]:
    hin_l.append(len(i.split()))

print(max(eng_l))
print(max(hin_l))

# function to build a tokenizer
def tokenization(lines):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

# prepare english tokenizer
eng_tokenizer = tokenization(eng_hin[:, 0])
eng_vocab_size = len(eng_tokenizer.word_index) + 1

eng_length = 25
print('English Vocabulary Size: %d' % eng_vocab_size)

# prepare Hindi tokenizer
hin_tokenizer = tokenization(eng_hin[:, 1])
hin_vocab_size = len(hin_tokenizer.word_index) + 1

hin_length = 25
print('Hindi Vocabulary Size: %d' % hin_vocab_size)

# encode and pad sequences
def encode_sequences(tokenizer, length, lines):
    # integer encode sequences
    seq = tokenizer.texts_to_sequences(lines)
    # pad sequences with 0 values
    seq = pad_sequences(seq, maxlen=length, padding='post')
    return seq

# Model Building
from sklearn.model_selection import train_test_split
train, test = train_test_split(eng_hin, test_size=0.2, random_state = 12)

# prepare training data
trainX = encode_sequences(eng_tokenizer, eng_length, train[:, 0])
trainY = encode_sequences(hin_tokenizer, hin_length, train[:, 1])

# prepare validation data
testX = encode_sequences(eng_tokenizer, eng_length, test[:, 0])
testY = encode_sequences(hin_tokenizer, hin_length, test[:, 1])

# build NMT model
def build_model(in_vocab, out_vocab, in_timesteps, out_timesteps, units):
    model = Sequential()
    model.add(Embedding(in_vocab, units, input_length=in_timesteps, mask_zero=True))
    model.add(LSTM(units))
    model.add(RepeatVector(out_timesteps))
    model.add(LSTM(units, return_sequences=True))
    model.add(Dense(out_vocab, activation='softmax'))
    return model

model = build_model(hin_vocab_size, eng_vocab_size, hin_length, eng_length, 512)
rms = optimizers.RMSprop(lr=0.001)
model.compile(optimizer=rms, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

filename = '/content/drive/My Drive/Colab Notebooks/Language Translator New/Englis_Hindi_Checkpoints/model.h1.31_dec_19'
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

history = model.fit(trainX, trainY.reshape(trainY.shape[0], trainY.shape[1], 1), 
          epochs=100, batch_size=64, 
          validation_split = 0.2,
          callbacks=[checkpoint], verbose=1)
model.save('/content/drive/My Drive/Colab Notebooks/Language Translator New/Englis_Hindi_Checkpoints/eng2hin.h5')

当我尝试拟合模型时,它运行但显示“nan”丢失。请帮我解决我的问题。

标签: pythontensorflowkerasdeep-learninglstm

解决方案


简而言之,这通常是因为损失函数/优化器不适合网络计算。我最近使用这个网络创建了一个计算器。尝试使用loss='categorical_crossentropy'andoptimizer='adam'看看它是否有效。


推荐阅读