首页 > 解决方案 > Keras 字符级 LSTM 文本分类未训练

问题描述

我正在使用 LSTM(我的第一个模型)为字符级文本分类制作 keras 模型。该模型应该对来自抽搐聊天的正常、垃圾邮件和粗鲁消息进行分类。然而,我得到的结果非常令人失望和困惑。

LSTM 网络学到的东西很少,无论我做什么,准确性都很糟糕。

这是我的代码

import tensorflow as tf
''' import tensorflowjs.converters '''
from tensorflow import keras
from tensorflow.keras import layers
import json
import numpy as np
import re
import random
import sys

np.set_printoptions(threshold=sys.maxsize)

vocab = " qwertyuiopasdfghjklñzxcvbnmç1234567890?¿¡!,:-+/@#áéíóú\/"

dropout = 0.2

x_train = []
y_train = []
one_hot_encode = []
sentence = []

# amount of examples in each class
maxofeachtype = 1600

countnormal = 0
countspam = 0
countofensivo = 0

# Load dataset from data.json
with open("./data.json", 'r', encoding="utf8") as file:
    data = json.load(file)

# suffle it
random.shuffle(data)

# create the vocabulary map
mapping = {}
for x in range(len(vocab)):
    mapping[vocab[x]] = x

# this is some to balance the dataset adjusting it to "maxofeachtype"
for example in data:
    if(example["y"] == [1, 0, 0] and countnormal < maxofeachtype):
        countnormal += 1
    elif(example["y"] == [0, 1, 0] and countspam < maxofeachtype):
        countspam += 1
    elif(example["y"] == [0, 0, 1] and countofensivo < maxofeachtype):
        countofensivo += 1
    elif(countnormal == maxofeachtype or countspam == maxofeachtype or countofensivo == maxofeachtype):
        continue

    # remove unwanted characters to only have the ones in vocab
    cleanexample = re.sub(
        r'[^qwertyuiopasdfghjklñzxcvbnmç1234567890?¿¡!,:@#áéíóú\/]', '', str(example["x"]))

    # if the sentence is less than 500 characters long (the max you can type in twitch) add spaces until it gets to 500 chars long
    if len(cleanexample) != 500:
        for a in range(500 - len(cleanexample)):
            cleanexample = cleanexample + " "
    for character in cleanexample:
        sentence.append(mapping[character])

    # print(sentence)
    y_train_ohe = tf.one_hot(sentence, depth=len(vocab)).numpy()
    # print(y_train_ohe)
    x_train.append(y_train_ohe)
    y_train.append(np.array(example["y"]))
    sentence = []

x_train = np.array(x_train)
y_train = np.array(y_train)
""" print(x_train[0][0:5], x_train[0][-5:], y_train[0]) """
print(x_train.shape[1], x_train.shape[2])
print(x_train.shape)
print(y_train.shape)

# Create the model
model = keras.Sequential()

model.add(layers.LSTM(256, activation="tanh",
                      return_sequences=True, dropout=dropout, input_shape=(500, 57)))

model.add(layers.LSTM(128, activation="tanh",
                      return_sequences=False, dropout=dropout))

model.add(layers.Dense(3, activation="softmax"))

optimizer = keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss="categorical_crossentropy",
              metrics=["accuracy"])

model.summary()

model.fit(x=x_train, y=y_train, epochs=15, shuffle=True,
          batch_size=25, validation_split=0.2)

model.save('model_py.h5')

''' tensorflowjs.converters.save_keras_model(model, "./modelo_js") '''

训练样本在处理之前看起来像这样。[1, 0, 0] 表示正常,[0, 1, 0] 粗鲁语言, [0, 0, 1] 表示垃圾邮件:

"x": "sentence",
        "y": [
            1,
            0,
            0
        ]

处理后它们看起来像这样。我将它们热编码为 57 的向量长度,即词汇量。以 1 开头的数组是空格:

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0.]]
  ....
 [1 0 0]

在使用每个类的 1600 个示例和 validation_split = 0.2 进行训练后,结果如下:

(4800, 500, 57)
(4800, 3)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #
=================================================================
lstm (LSTM)                  (None, 500, 256)          321536
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               197120
_________________________________________________________________
dense (Dense)                (None, 3)                 387
=================================================================
Total params: 519,043
Trainable params: 519,043
Non-trainable params: 0
_________________________________________________________________
Epoch 1/15
2020-09-09 12:35:47.606648: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cublas64_10.dll
2020-09-09 12:35:47.872095: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudnn64_7.dll
154/154 [==============================] - 13s 87ms/step - loss: 1.0811 - accuracy: 0.4120 - val_loss: 2.0132 - val_accuracy: 0.0219
Epoch 2/15
154/154 [==============================] - 12s 78ms/step - loss: 1.0577 - accuracy: 0.4177 - val_loss: 2.0314 - val_accuracy: 0.0000e+00
Epoch 3/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0457 - accuracy: 0.4154 - val_loss: 1.6968 - val_accuracy: 0.0000e+00
Epoch 4/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0506 - accuracy: 0.4161 - val_loss: 1.7731 - val_accuracy: 0.0000e+00
Epoch 5/15
154/154 [==============================] - 11s 73ms/step - loss: 1.0511 - accuracy: 0.4313 - val_loss: 1.9052 - val_accuracy: 0.0000e+00
Epoch 6/15
154/154 [==============================] - 12s 75ms/step - loss: 1.0473 - accuracy: 0.4104 - val_loss: 1.6291 - val_accuracy: 0.0000e+00
Epoch 7/15
154/154 [==============================] - 13s 84ms/step - loss: 1.0464 - accuracy: 0.4135 - val_loss: 1.8916 - val_accuracy: 0.0000e+00
Epoch 8/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0404 - accuracy: 0.4208 - val_loss: 1.8094 - val_accuracy: 0.0000e+00
Epoch 9/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0449 - accuracy: 0.4096 - val_loss: 1.9690 - val_accuracy: 0.0219
Epoch 10/15
154/154 [==============================] - 12s 77ms/step - loss: 1.0489 - accuracy: 0.4104 - val_loss: 1.9596 - val_accuracy: 0.0000e+00
Epoch 11/15
154/154 [==============================] - 13s 83ms/step - loss: 1.0455 - accuracy: 0.4141 - val_loss: 1.8082 - val_accuracy: 0.0000e+00
Epoch 12/15
154/154 [==============================] - 12s 76ms/step - loss: 1.0465 - accuracy: 0.4219 - val_loss: 1.7066 - val_accuracy: 0.0000e+00
Epoch 13/15
154/154 [==============================] - 12s 75ms/step - loss: 1.0424 - accuracy: 0.4161 - val_loss: 1.5192 - val_accuracy: 0.0000e+00
Epoch 14/15
154/154 [==============================] - 12s 75ms/step - loss: 1.0481 - accuracy: 0.4154 - val_loss: 1.5999 - val_accuracy: 0.0000e+00
Epoch 15/15
154/154 [==============================] - 12s 77ms/step - loss: 1.0476 - accuracy: 0.4008 - val_loss: 2.0612 - val_accuracy: 0.0000e+00 

奇怪的是,如果我增加验证拆分,结果会有所改善。这对我来说毫无意义,因为它的训练数据较少。

这是validation_split = 0.6

77/77 [==============================] - 8s 103ms/step - loss: 1.0352 - accuracy: 0.4432 - val_loss: 1.4233 - val_accuracy: 0.2313
Epoch 2/15
77/77 [==============================] - 7s 93ms/step - loss: 0.9906 - accuracy: 0.4443 - val_loss: 1.7316 - val_accuracy: 0.2937
Epoch 3/15
77/77 [==============================] - 7s 92ms/step - loss: 0.9863 - accuracy: 0.4812 - val_loss: 1.5367 - val_accuracy: 0.2313
Epoch 4/15
77/77 [==============================] - 7s 94ms/step - loss: 0.9874 - accuracy: 0.4635 - val_loss: 1.4075 - val_accuracy: 0.2937
Epoch 5/15
77/77 [==============================] - 7s 93ms/step - loss: 0.9905 - accuracy: 0.4594 - val_loss: 1.5759 - val_accuracy: 0.2937
Epoch 6/15
77/77 [==============================] - 7s 93ms/step - loss: 0.9808 - accuracy: 0.4703 - val_loss: 1.3886 - val_accuracy: 0.2937
Epoch 7/15
77/77 [==============================] - 7s 96ms/step - loss: 0.9815 - accuracy: 0.4781 - val_loss: 1.2495 - val_accuracy: 0.2313
Epoch 8/15
77/77 [==============================] - 7s 96ms/step - loss: 0.9824 - accuracy: 0.4698 - val_loss: 1.4516 - val_accuracy: 0.2313
Epoch 9/15
77/77 [==============================] - 7s 92ms/step - loss: 0.9916 - accuracy: 0.4573 - val_loss: 1.4488 - val_accuracy: 0.2313
Epoch 10/15
77/77 [==============================] - 7s 90ms/step - loss: 0.9858 - accuracy: 0.4760 - val_loss: 1.3868 - val_accuracy: 0.2313
Epoch 11/15
77/77 [==============================] - 7s 93ms/step - loss: 0.9861 - accuracy: 0.4734 - val_loss: 1.5702 - val_accuracy: 0.2313
Epoch 12/15
77/77 [==============================] - 7s 91ms/step - loss: 0.9880 - accuracy: 0.4630 - val_loss: 1.4439 - val_accuracy: 0.2313
Epoch 13/15
77/77 [==============================] - 7s 91ms/step - loss: 0.9796 - accuracy: 0.4865 - val_loss: 1.3597 - val_accuracy: 0.2313
Epoch 14/15
77/77 [==============================] - 7s 91ms/step - loss: 0.9832 - accuracy: 0.4745 - val_loss: 1.5791 - val_accuracy: 0.2313
Epoch 15/15
77/77 [==============================] - 7s 90ms/step - loss: 0.9919 - accuracy: 0.4760 - val_loss: 1.6243 - val_accuracy: 0.2313

并且validation_split = 0.8

39/39 [==============================] - 7s 171ms/step - loss: 1.1238 - accuracy: 0.4484 - val_loss: 1.3041 - val_accuracy: 0.3158
Epoch 2/15
39/39 [==============================] - 6s 143ms/step - loss: 0.9795 - accuracy: 0.4692 - val_loss: 1.2562 - val_accuracy: 0.3174
Epoch 3/15
39/39 [==============================] - 6s 146ms/step - loss: 0.9757 - accuracy: 0.4724 - val_loss: 1.3583 - val_accuracy: 0.3437
Epoch 4/15
39/39 [==============================] - 6s 149ms/step - loss: 0.9741 - accuracy: 0.4703 - val_loss: 1.3565 - val_accuracy: 0.2976
Epoch 5/15
39/39 [==============================] - 6s 148ms/step - loss: 0.9748 - accuracy: 0.4578 - val_loss: 1.3904 - val_accuracy: 0.2976
Epoch 6/15
39/39 [==============================] - 5s 137ms/step - loss: 0.9697 - accuracy: 0.4755 - val_loss: 1.3418 - val_accuracy: 0.2976
Epoch 7/15
39/39 [==============================] - 5s 136ms/step - loss: 0.9716 - accuracy: 0.4765 - val_loss: 1.3053 - val_accuracy: 0.3262
Epoch 8/15
39/39 [==============================] - 5s 136ms/step - loss: 0.9748 - accuracy: 0.4557 - val_loss: 1.3529 - val_accuracy: 0.2976
Epoch 9/15
39/39 [==============================] - 5s 140ms/step - loss: 0.9768 - accuracy: 0.4505 - val_loss: 1.3260 - val_accuracy: 0.2976
Epoch 10/15
39/39 [==============================] - 5s 136ms/step - loss: 0.9724 - accuracy: 0.4859 - val_loss: 1.3351 - val_accuracy: 0.3627
Epoch 11/15
39/39 [==============================] - 6s 143ms/step - loss: 0.9748 - accuracy: 0.4588 - val_loss: 1.3203 - val_accuracy: 0.3770
Epoch 12/15
39/39 [==============================] - 6s 144ms/step - loss: 0.9690 - accuracy: 0.4640 - val_loss: 1.3207 - val_accuracy: 0.3517
Epoch 13/15
39/39 [==============================] - 5s 137ms/step - loss: 0.9661 - accuracy: 0.4369 - val_loss: 1.3153 - val_accuracy: 0.3681
Epoch 14/15
39/39 [==============================] - 6s 141ms/step - loss: 0.9628 - accuracy: 0.4661 - val_loss: 1.3405 - val_accuracy: 0.2976
Epoch 15/15
39/39 [==============================] - 5s 137ms/step - loss: 0.9625 - accuracy: 0.4703 - val_loss: 1.3586 - val_accuracy: 0.3457

我试过只使用密集层,结果要好得多,这对我来说没有意义,因为他们无法理解序列。然而,这清除了数据集坏的选项。

使用此配置(validation_split 回到 0.2):

model = keras.Sequential()

model.add(layers.Input(shape=(500, 57)))

model.add(layers.Flatten())

model.add(layers.Dense(256, activation="relu"))

model.add(layers.Dense(128, activation="relu"))

model.add(layers.Dense(64, activation="relu"))

model.add(layers.Dense(3, activation="softmax"))

optimizer = keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss="categorical_crossentropy",
              metrics=["accuracy"])

我得到这些结果:

154/154 [==============================] - 1s 6ms/step - loss: 0.7377 - accuracy: 0.7844 - val_loss: 1.4061 - val_accuracy: 0.0250
Epoch 2/15
154/154 [==============================] - 1s 4ms/step - loss: 0.3479 - accuracy: 0.8448 - val_loss: 0.8703 - val_accuracy: 0.6927
Epoch 3/15
154/154 [==============================] - 1s 4ms/step - loss: 0.3033 - accuracy: 0.8794 - val_loss: 1.4597 - val_accuracy: 0.6938
Epoch 4/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2899 - accuracy: 0.8966 - val_loss: 1.6684 - val_accuracy: 0.4896
Epoch 5/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2447 - accuracy: 0.9042 - val_loss: 1.6465 - val_accuracy: 0.4812
Epoch 6/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2269 - accuracy: 0.9211 - val_loss: 3.9954 - val_accuracy: 0.7312
Epoch 7/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2071 - accuracy: 0.9201 - val_loss: 2.7729 - val_accuracy: 0.4698
Epoch 8/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2081 - accuracy: 0.9302 - val_loss: 5.1325 - val_accuracy: 0.4229
Epoch 9/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1581 - accuracy: 0.9378 - val_loss: 4.4410 - val_accuracy: 0.3688
Epoch 10/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2184 - accuracy: 0.9333 - val_loss: 2.6669 - val_accuracy: 0.5396
Epoch 11/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1673 - accuracy: 0.9341 - val_loss: 3.6476 - val_accuracy: 0.2750
Epoch 12/15
154/154 [==============================] - 1s 4ms/step - loss: 0.2111 - accuracy: 0.9443 - val_loss: 1.6768 - val_accuracy: 0.6885
Epoch 13/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1210 - accuracy: 0.9547 - val_loss: 2.6785 - val_accuracy: 0.5406
Epoch 14/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1179 - accuracy: 0.9542 - val_loss: 3.4468 - val_accuracy: 0.4385
Epoch 15/15
154/154 [==============================] - 1s 4ms/step - loss: 0.1265 - accuracy: 0.9469 - val_loss: 2.0159 - val_accuracy: 0.7083

该模型改变了 val_accuracy 相当多,但至少我知道它正在学习一些东西。

我试过(使用 LSTM 模型):

- 改变层数 - 改变每层神经元的数量 - 改变学习率 - 将优化器改为 SGD - 改变损失函数 - 改变 epoch 的数量 - 改变训练样本的数量(复制每个样本) -在 Adam 中使用衰变

标签: pythontensorflowkeraslstmtext-classification

解决方案


我在这里看到两个问题:

  • LSTM 不适用于 one-hot 输入。使用填充 序列(例如,[5, 6, 8]
  • 在 LSTM 层之前需要一个嵌入层

仅更改数据(和损失函数),我做了一个基于您的架构的示例(添加了嵌入层):

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import sys

np.set_printoptions(threshold=sys.maxsize)

X_train = ['They like my dog', 'I hate my cat', 'We will love my hamster', 
           'I dislike your llama']
X_test = ['We love our hamster', 'They hate our platypus']
y_train = [1, 0, 1, 0]
y_test = [1, 0]

labels = {0: 'negative', 1: 'positive'}

encoder = keras.preprocessing.text.Tokenizer()

encoder.fit_on_texts(X_train)

X_train = encoder.texts_to_sequences(X_train)
X_test = encoder.texts_to_sequences(X_test)

max_length = max(map(len, X_train))

x_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_length)
x_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=max_length)

x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

embedding_dim = 4
# print(x_train.shape[1], x_train.shape[2])
print(x_train.shape)
print(y_train.shape)

# Create the model
model = keras.Sequential()

model.add(layers.Embedding(len(encoder.index_word) + 1, embedding_dim))

model.add(layers.LSTM(8, activation="tanh",
                      return_sequences=True, dropout=.2))

model.add(layers.LSTM(8, activation="tanh",
                      return_sequences=False, dropout=.2))

model.add(layers.Dense(2, activation="softmax"))

optimizer = keras.optimizers.Adam(lr=0.01)

model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

model.build(input_shape=x_train.shape)
model.summary()

history = model.fit(x=x_train, y=y_train, epochs=25, shuffle=True,
          batch_size=25, validation_data=(x_test, y_test))

让我知道是否需要澄清任何事情。


推荐阅读