首页 > 解决方案 > 关于简单 Keras 和 Tensorflow 代码性能的问题

问题描述

我使用 Keras 和 Tensorflow 和 LSTM 编写了简单的 Sin 函数预测器,但发现 Keras 代码的性能要慢得多,运行大约 5 分钟,而 Tensorflow 代码只需 20 秒即可运行模型。此外,Keras 的预测性能不如 Keras 预测性能那么精确。任何人都可以帮我找到 2 模型之间的代码差异吗?

我在网上破解了代码,打算用相同的超参数训练模型。但性能并不如预期。网上找了很多资料,都没找到原因。

凯拉斯代码:

import numpy as np
import os
import sys
import time
from tqdm._tqdm_notebook import tqdm_notebook
import pickle
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
from keras import optimizers
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

stime = time.time()
BATCH_SIZE = 20
TIME_STEPS = 10
LN = 410
DIFF = 2
OUTPUT_PATH = '/Users/xiachang/Documents/RNN/test_outputs'
SCALER_COL_IDX = 0

params = {
    "batch_size": BATCH_SIZE,  # 20<16<10, 25 was a bust
    "epochs": 500,
    "lr": 0.00010000,
    "time_steps": TIME_STEPS
}

TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01
HIDDEN_UNITS = 20

# data = np.array([[i * (DIFF)] for i in range(LN)])
#
# min_max_scaler = MinMaxScaler()
# data = min_max_scaler.fit_transform(data)


def generate_data(seq):
    X = []
    y = []
    for i in range(len(seq) - TIME_STEPS):
        X.append([[e] for e in seq[i: i + TIME_STEPS]])
        y.append([seq[i + TIME_STEPS]])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)


test_start = (TRAINING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIME_STEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
    0, test_start, TRAINING_EXAMPLES + TIME_STEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
    test_start, test_end, TESTING_EXAMPLES + TIME_STEPS, dtype=np.float32)))

x_val, x_test = np.split(test_X, 2)
y_val, y_test = np.split(test_y, 2)


def print_time(text, stime):
    seconds = (time.time()-stime)
    print(text, seconds//60,"minutes : ",np.round(seconds%60),"seconds")


def create_model():
    lstm_model = Sequential()
    lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
    lstm_model.add(LSTM(HIDDEN_UNITS, return_sequences=True))
    lstm_model.add(LSTM(HIDDEN_UNITS))
    lstm_model.add(Dense(1, activation=None))
    lstm_model.compile(loss='mean_squared_error', optimizer=optimizers.Adagrad(lr=0.1))
    return lstm_model


model = create_model()

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=30, min_delta=0.0001)

mcp = ModelCheckpoint(os.path.join(OUTPUT_PATH,
                      "best_model.h5"), monitor='val_loss', verbose=1,
                      save_best_only=True, save_weights_only=False, mode='min', period=1)

# Not used here. But leaving it here as a reminder for future
r_lr_plat = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=30,
                              verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)

csv_logger = CSVLogger(os.path.join(OUTPUT_PATH, 'training_log_' + time.ctime().replace(" ","_") + '.log'), append=True)

history = model.fit(train_X, train_y, epochs=params["epochs"], verbose=2, batch_size=BATCH_SIZE,
                    shuffle=False, validation_data=(x_val, y_val), callbacks=[es, mcp, csv_logger])

print("saving model...")
pickle.dump(model, open("test_outputs/lstm_model", "wb"))

# Visualize the training data
from matplotlib import pyplot as plt
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
#plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'train_vis_BS_'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))

# load the saved best model from above
saved_model = load_model(os.path.join(OUTPUT_PATH, 'best_model.h5')) # , "lstm_best_7-3-19_12AM",
print(saved_model)

y_pred = saved_model.predict(x_test, batch_size=BATCH_SIZE)
y_pred = y_pred.flatten()
y_test_t = y_test
error = mean_squared_error(y_test_t, y_pred)
print("Error is", error, y_pred.shape, y_test_t.shape)
print(y_pred[0:15])
print(y_test_t[0:15])
y_pred_org = y_pred
y_test_t_org = y_test_t
print(y_pred_org[0:15])
print(y_test_t_org[0:15])

# Visualize the prediction
from matplotlib import pyplot as plt
plt.figure()
plt.plot(y_pred_org)
plt.plot(y_test_t_org)
plt.title('Prediction vs Real Value')
plt.ylabel('Y')
plt.xlabel('X')
plt.legend(['Prediction', 'Real'], loc='upper left')
# plt.show()
plt.savefig(os.path.join(OUTPUT_PATH, 'pred_vs_real_BS'+str(BATCH_SIZE)+"_"+time.ctime()+'.png'))
print_time("program completed ", stime)

张量流代码:

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt


NUM_EPOCH = 1000
HIDDEN_SIZE = 30
NUM_LAYERS = 2
TIMESTEPS = 10
TRAINING_STEPS = 10000
BATCH_SIZE = 20
TRAINING_EXAMPLES = 10000
TESTING_EXAMPLES = 1000
SAMPLE_GAP = 0.01


def generate_data(seq):
    X = []
    y = []
    for i in range(len(seq) - TIMESTEPS):
        X.append([seq[i: i + TIMESTEPS]])
        y.append([seq[i + TIMESTEPS]])
    return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)


def lstm_model(X, y, is_training):
    cell = tf.nn.rnn_cell.MultiRNNCell([tf.nn.rnn_cell.LSTMCell(HIDDEN_SIZE) for _ in range(NUM_LAYERS)])
    outputs, _ = tf.nn.dynamic_rnn(cell, X, dtype=tf.float32)
    output = outputs[:, -1, :]
    predictions = tf.contrib.layers.fully_connected(output, 1, activation_fn=None)
    if not is_training:
        return predictions, None, None
    loss = tf.losses.mean_squared_error(labels=y, predictions=predictions)
    train_op = tf.contrib.layers.optimize_loss(
        loss, tf.train.get_global_step(), optimizer='Adagrad', learning_rate=0.1)
    return predictions, loss, train_op


def train(sess, train_X, train_Y):
    ds = tf.data.Dataset.from_tensor_slices((train_X, train_Y))
    ds = ds.repeat().shuffle(1000).batch(BATCH_SIZE)
    X, y = ds.make_one_shot_iterator().get_next()
    losses = np.array([])

    with tf.variable_scope('model'):
        predictions, loss, train_op = lstm_model(X, y, True)
    sess.run(tf.global_variables_initializer())
    for i in range(TRAINING_STEPS):
        _, l = sess.run([train_op, loss])
        losses = np.append(losses, l)
        if i % NUM_EPOCH == 0:
            print('train step: ' + str(i) + ', loss: ' + str(l))

    plt.figure()
    plt.plot(losses, label='loss')
    plt.legend()
    # plt.show()
    plt.savefig('./test_outputs/loss.png')


def run_eval(sess, test_X, test_y):
    ds = tf.data.Dataset.from_tensor_slices((test_X, test_y))
    ds = ds.batch(1)
    X, y = ds.make_one_shot_iterator().get_next()
    with tf.variable_scope('model', reuse=True):
        prediction, _, _ = lstm_model(X, [0, 0], False)
    predictions = []
    labels = []
    for i in range(int(TESTING_EXAMPLES / 2)):
        p, l = sess.run([prediction, y])
        predictions.append(p)
        labels.append(l)

    predictions = np.array(predictions).squeeze()
    labels = np.array(labels).squeeze()
    rmse = np.sqrt(((predictions - labels) ** 2).mean(axis=0))
    print('Mean Square Error is: %f' % rmse)

    plt.figure()
    print(predictions[:15])
    print(labels[:15])
    plt.plot(predictions, label='predictions')
    plt.plot(labels, label='real_val')
    plt.legend()
    # plt.show()
    plt.savefig('./test_outputs/test.png')


test_start = (TRAINING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
test_end = test_start + (TESTING_EXAMPLES + TIMESTEPS) * SAMPLE_GAP + 1
train_X, train_y = generate_data(np.sin(np.linspace(
    0, test_start, TRAINING_EXAMPLES + TIMESTEPS, dtype=np.float32)))
test_X, test_y = generate_data(np.sin(np.linspace(
    test_start, test_end, TESTING_EXAMPLES + TIMESTEPS, dtype=np.float32)))

x_val, test_X = np.split(test_X, 2)
y_val, test_y = np.split(test_y, 2)

with tf.Session() as sess:
    train(sess, train_X, train_y)
    run_eval(sess, test_X, test_y)

标签: tensorflowkeraslstmrecurrent-neural-network

解决方案


您也许应该尝试使用CuDNNLSTM而不是LSTM. 它们是 CUDA 加速的。

使用CuDNN的快速 LSTM 实现。

见这里:https ://github.com/keras-team/keras/blob/master/keras/layers/cudnn_recurrent.py#L328


推荐阅读