python - 英语到印地语翻译 LSTM
问题描述
我正在尝试使用深度学习 LSTM 实现英语到印地语的翻译。但是当我训练模型时,它在实际和验证中都显示出“nan”损失。
包含翻译对的文本文件链接-:http: //www.manythings.org/anki/
下面是我的 Jupyter 笔记本代码:
import string
import re
from numpy import array, argmax, random, take, delete
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, Embedding, Bidirectional, RepeatVector, TimeDistributed
from keras.preprocessing.text import Tokenizer
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.sequence import pad_sequences
from keras.models import load_model
from keras import optimizers
import matplotlib.pyplot as plt
# function to read raw text file
def read_text(filename):
# open the file
file = open(filename, mode='rt', encoding='utf-8')
# read all text
text = file.read()
file.close()
return text
# split a text into sentences
def to_lines(text):
sents = text.strip().split('\n')
sents = [i.split('\t') for i in sents]
return sents
data = read_text("/content/drive/My Drive/Colab Notebooks/Language Translator New/hin.txt")
eng_hin = to_lines(data)
eng_hin = array(eng_hin)
eng_hin = delete(eng_hin, 2, axis=1)
# Remove punctuation
eng_hin[:,0] = [s.translate(str.maketrans('', '', string.punctuation)) for s in eng_hin[:,0]]
eng_hin[:,1] = [s.translate(str.maketrans('', '', string.punctuation)) for s in eng_hin[:,1]]
# convert to lowercase
for i in range(len(eng_hin)):
eng_hin[i,0] = eng_hin[i,0].lower()
eng_hin[i,1] = eng_hin[i,1].lower()
# empty lists
eng_l = []
hin_l = []
# populate the lists with sentence lengths
for i in eng_hin[:,0]:
eng_l.append(len(i.split()))
for i in eng_hin[:,1]:
hin_l.append(len(i.split()))
print(max(eng_l))
print(max(hin_l))
# function to build a tokenizer
def tokenization(lines):
tokenizer = Tokenizer()
tokenizer.fit_on_texts(lines)
return tokenizer
# prepare english tokenizer
eng_tokenizer = tokenization(eng_hin[:, 0])
eng_vocab_size = len(eng_tokenizer.word_index) + 1
eng_length = 25
print('English Vocabulary Size: %d' % eng_vocab_size)
# prepare Hindi tokenizer
hin_tokenizer = tokenization(eng_hin[:, 1])
hin_vocab_size = len(hin_tokenizer.word_index) + 1
hin_length = 25
print('Hindi Vocabulary Size: %d' % hin_vocab_size)
# encode and pad sequences
def encode_sequences(tokenizer, length, lines):
# integer encode sequences
seq = tokenizer.texts_to_sequences(lines)
# pad sequences with 0 values
seq = pad_sequences(seq, maxlen=length, padding='post')
return seq
# Model Building
from sklearn.model_selection import train_test_split
train, test = train_test_split(eng_hin, test_size=0.2, random_state = 12)
# prepare training data
trainX = encode_sequences(eng_tokenizer, eng_length, train[:, 0])
trainY = encode_sequences(hin_tokenizer, hin_length, train[:, 1])
# prepare validation data
testX = encode_sequences(eng_tokenizer, eng_length, test[:, 0])
testY = encode_sequences(hin_tokenizer, hin_length, test[:, 1])
# build NMT model
def build_model(in_vocab, out_vocab, in_timesteps, out_timesteps, units):
model = Sequential()
model.add(Embedding(in_vocab, units, input_length=in_timesteps, mask_zero=True))
model.add(LSTM(units))
model.add(RepeatVector(out_timesteps))
model.add(LSTM(units, return_sequences=True))
model.add(Dense(out_vocab, activation='softmax'))
return model
model = build_model(hin_vocab_size, eng_vocab_size, hin_length, eng_length, 512)
rms = optimizers.RMSprop(lr=0.001)
model.compile(optimizer=rms, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
filename = '/content/drive/My Drive/Colab Notebooks/Language Translator New/Englis_Hindi_Checkpoints/model.h1.31_dec_19'
checkpoint = ModelCheckpoint(filename, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
history = model.fit(trainX, trainY.reshape(trainY.shape[0], trainY.shape[1], 1),
epochs=100, batch_size=64,
validation_split = 0.2,
callbacks=[checkpoint], verbose=1)
model.save('/content/drive/My Drive/Colab Notebooks/Language Translator New/Englis_Hindi_Checkpoints/eng2hin.h5')
当我尝试拟合模型时,它运行但显示“nan”丢失。请帮我解决我的问题。
解决方案
简而言之,这通常是因为损失函数/优化器不适合网络计算。我最近使用这个网络创建了一个计算器。尝试使用loss='categorical_crossentropy'
andoptimizer='adam'
看看它是否有效。
推荐阅读
- python - 'DataFrame' 对象没有属性 'DatetimeIndex'
- python - Str.split and time.sleep python when executed output is coming as the word+ none
- python - This API key is not authorized to use this service or API
- php - 在 WooCommerce 中获取产品评论计数的自定义功能
- json - A question about "JSON 2 Table using VueJS"
- r - 您能帮我使用 Rvest 进行网页抓取吗?
- javascript - 更新列表时更新模块即时更改
- amazon-web-services - 在 Squarespace 开发人员模式下,可以将 AWS CLI 开发工具包保存到网站的服务器端“主”目录吗?如果是这样,怎么做?
- pytorch - 如何在pytorch中实现部分跨步卷积层?
- vba - 在非连续范围内使用多个列表框