首页 > 解决方案 > 为什么 Scikit -Optimize 中的贝叶斯优化包每次都给出不同的结果?

问题描述

我正在使用 Scikit-optimize 包在 LSTM 上执行超参数优化任务。我在其中使用gp_minimize函数。我有一个商店的销售预测任务。当我运行优化任务两次并获得单独的结果时。我注意到结果不同。我正在设置 TensorFlow 种子、numpy 种子和 gp_minimize 中的种子。我不明白应该是什么问题。

下面显示的是我的代码。任何帮助深表感谢。

import skopt
from sklearn.ensemble import RandomForestRegressor
from skopt import gp_minimize, dump
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_convergence
from skopt.plots import plot_objective, plot_evaluations
from skopt.utils import use_named_args
import matplotlib as mplt
from xgboost import XGBRegressor
mplt.use('agg')  # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
import csv
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from math import sqrt
import atexit
from time import time, strftime, localtime
from datetime import timedelta
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error



randomState = 46
np.random.seed(randomState)
tf.set_random_seed(randomState)

input_size = 1
num_layers = 1
columns = ['Sales', 'DayOfWeek', 'SchoolHoliday', 'Promo','lagged_Open','lagged_promo','lagged_SchoolHoliday']
features = len(columns)
fileName = None
column_min_max = None
Error_file_name = None
Error_plot_name = None

fileNames = ['store165_1']
column_min_max_all = [[[0, 9000], [1, 7]]]


num_steps = None
lstm_size = None
batch_size = None
init_learning_rate = None
learning_rate_decay = None
init_epoch = None  # 5
max_epoch = None  # 100 or 50
hidden1_nodes = None
hidden2_nodes = None
dropout_rate = None
hidden1_activation = None
hidden2_activation = None
lstm_activation = None

lowest_error = 0.0
start = None
iteration = 0
bestTestPrediction = None
bestValiPrediction = None
bestTestTrueVal = None
bestValiTrueVal = None

lstm_num_steps = Integer(low=2, high=14, name='lstm_num_steps')
size = Integer(low=8, high=128, name='size')
lstm_hidden1_nodes = Integer(low=4, high=64, name='lstm_hidden1_nodes')
lstm_hidden2_nodes = Integer(low=2, high=32, name='lstm_hidden2_nodes')
lstm_learning_rate_decay = Real(low=0.7, high=0.99, prior='uniform', name='lstm_learning_rate_decay')
lstm_max_epoch = Integer(low=60, high=200, name='lstm_max_epoch')
lstm_init_epoch = Integer(low=5, high=50, name='lstm_init_epoch')
lstm_batch_size = Integer(low=5, high=64, name='lstm_batch_size')
lstm_dropout_rate = Real(low=0.1, high=0.9, prior='uniform', name='lstm_dropout_rate')
lstm_init_learning_rate = Real(low=1e-4, high=1e-1, prior='log-uniform', name='lstm_init_learning_rate')
lstm_hidden1_activation = Categorical(categories=[tf.nn.relu, tf.nn.tanh], name='lstm_hidden1_activation')
lstm_hidden2_activation = Categorical(categories=[tf.nn.relu, tf.nn.tanh], name='lstm_hidden2_activation')
lstm_lstm_activation = Categorical(categories=[tf.nn.relu, tf.nn.tanh], name='lstm_lstm_activation')

dimensions = [lstm_num_steps, size, lstm_hidden1_nodes, lstm_hidden2_nodes, lstm_init_epoch, lstm_max_epoch,
              lstm_learning_rate_decay, lstm_batch_size, lstm_dropout_rate, lstm_init_learning_rate,
              lstm_hidden1_activation, lstm_hidden2_activation, lstm_lstm_activation]

default_parameters = [5, 35, 30, 15, 5, 60, 0.99, 8, 0.1, 0.01, tf.nn.relu, tf.nn.relu, tf.nn.relu]



def secondsToStr(elapsed=None):
    if elapsed is None:
        return strftime("%Y-%m-%d %H:%M:%S", localtime())
    else:
        return str(timedelta(seconds=elapsed))


def log(s, elapsed=None):
    line = "=" * 40
    print(line)
    print(secondsToStr(), '-', s)
    if elapsed:
        print("Elapsed time:", elapsed)
    print(line)
    print()


def endlog():
    end = time()
    elapsed = end - start
    log("End Program", secondsToStr(elapsed))


def plot():
    fig = plt.figure()
    fig = plt.figure(dpi=100, figsize=(20, 7))
    error_vals = pd.read_csv(Error_file_name, header=None)

    iterations = range(len(error_vals.iloc[:, 0]))
    values = error_vals.iloc[:, 0].get_values()
    plt.plot(iterations,values , label='RMSE')
    plt.legend(loc='upper left', frameon=False)
    plt.xlabel("Iteration")
    plt.ylabel("RMSE")
    plt.grid(ls='--')
    plt.savefig(Error_plot_name, format='png', bbox_inches='tight', transparent=False)
    plt.close()


def generate_batches(train_X, train_y, batch_size):
    num_batches = int(len(train_X)) // batch_size
    if batch_size * num_batches < len(train_X):
        num_batches += 1

    batch_indices = range(num_batches)
    for j in batch_indices:
        batch_X = train_X[j * batch_size: (j + 1) * batch_size]
        batch_y = train_y[j * batch_size: (j + 1) * batch_size]
        # assert set(map(len, batch_X)) == {num_steps}
        yield batch_X, batch_y


def segmentation(data):
    seq = [price for tup in data[columns].values for price in tup]

    seq = np.array(seq)

    # split into items of features
    seq = [np.array(seq[i * features: (i + 1) * features])
           for i in range(len(seq) // features)]

    # split into groups of num_steps
    X = np.array([seq[i: i + num_steps] for i in range(len(seq) - num_steps)])

    y = np.array([seq[i + num_steps] for i in range(len(seq) - num_steps)])

    # get only sales value
    y = [[y[i][0]] for i in range(len(y))]

    y = np.asarray(y)

    return X, y


def scale(data):
    for i in range(len(column_min_max)):
        data[columns[i]] = (data[columns[i]] - column_min_max[i][0]) / ((column_min_max[i][1]) - (column_min_max[i][0]))

    return data


def rescle(test_pred):
    prediction = [(pred * (column_min_max[0][1] - column_min_max[0][0])) + column_min_max[0][0] for pred in test_pred]

    return prediction


def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    itemindex = np.where(y_true == 0)
    y_true = np.delete(y_true, itemindex)
    y_pred = np.delete(y_pred, itemindex)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def RMSPE(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    itemindex = np.where(y_true == 0)
    y_true = np.delete(y_true, itemindex)
    y_pred = np.delete(y_pred, itemindex)
    return np.sqrt(np.mean(np.square(((y_true - y_pred) / y_true)), axis=0))


def pre_process():
    store_data = pd.read_csv(fileName)
    store_data['lagged_Open'] = store_data['lagged_Open'].astype(int)
    store_data['lagged_promo'] = store_data['lagged_promo'].astype(int)
    store_data['lagged_SchoolHoliday'] = store_data['lagged_SchoolHoliday'].astype(int)

    # sftp://wso2@192.168.32.11/home/wso2/suleka/salesPred/store2_1_original.csv

    # store_data = store_data.drop(store_data[(store_data.Open == 0) & (store_data.Sales == 0)].index)
    #
    # store_data = store_data.drop(store_data[(store_data.Open != 0) & (store_data.Sales == 0)].index)

    # ---for segmenting original data --------------------------------
    original_data = store_data.copy()

    ## train_size = int(len(store_data) * (1.0 - test_ratio))

    validation_len = len(store_data[(store_data.Month == 6) & (store_data.Year == 2015)].index)
    test_len = len(store_data[(store_data.Month == 7) & (store_data.Year == 2015)].index)
    train_size = int(len(store_data) - (validation_len + test_len))

    train_data = store_data[:train_size]
    validation_data = store_data[(train_size - num_steps): validation_len + train_size]
    test_data = store_data[((validation_len + train_size) - num_steps):]
    original_val_data = validation_data.copy()
    original_test_data = test_data.copy()

    # -------------- processing train data---------------------------------------
    scaled_train_data = scale(train_data)
    train_X, train_y = segmentation(scaled_train_data)

    # -------------- processing validation data---------------------------------------
    scaled_validation_data = scale(validation_data)
    val_X, val_y = segmentation(scaled_validation_data)

    # -------------- processing test data---------------------------------------
    scaled_test_data = scale(test_data)
    test_X, test_y = segmentation(scaled_test_data)

    # ----segmenting original validation data-----------------------------------------------
    nonescaled_val_X, nonescaled_val_y = segmentation(original_val_data)

    # ----segmenting original test data-----------------------------------------------
    nonescaled_test_X, nonescaled_test_y = segmentation(original_test_data)

    return train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y


def setupRNN(inputs,model_dropout_rate):


    cell = tf.contrib.rnn.LSTMCell(lstm_size, state_is_tuple=True, activation=lstm_activation,use_peepholes=True)

    val1, _ = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)

    val = tf.transpose(val1, [1, 0, 2])

    last = tf.gather(val, int(val.get_shape()[0]) - 1, name="last_lstm_output")

    # hidden layer
    hidden1 = tf.layers.dense(last, units=hidden1_nodes, activation=hidden2_activation)
    hidden2 = tf.layers.dense(hidden1, units=hidden2_nodes, activation=hidden1_activation)

    dropout = tf.layers.dropout(hidden2, rate=model_dropout_rate, training=True)

    weight = tf.Variable(tf.truncated_normal([hidden2_nodes, input_size]))
    bias = tf.Variable(tf.constant(0.1, shape=[input_size]))

    prediction = tf.nn.relu(tf.matmul(dropout, weight) + bias)

    return prediction


# saver = tf.train.Saver()
# saver.save(sess, "checkpoints_sales/sales_pred.ckpt")


@use_named_args(dimensions=dimensions)
def fitness(lstm_num_steps, size, lstm_hidden1_nodes, lstm_hidden2_nodes, lstm_init_epoch, lstm_max_epoch,
            lstm_learning_rate_decay, lstm_batch_size, lstm_dropout_rate, lstm_init_learning_rate,
            lstm_hidden1_activation, lstm_hidden2_activation, lstm_lstm_activation):
    global bestValiPrediction,bestValiTrueVal,bestTestPrediction,bestTestTrueVal, iteration, hidden1_activation, hidden2_activation, lstm_activation, lowest_error, num_steps, lstm_size, hidden2_nodes, hidden2_activation, hidden1_activation, hidden1_nodes, lstm_activation, init_epoch, max_epoch, learning_rate_decay, dropout_rate, init_learning_rate

    # num_steps = lstm_num_steps
    # lstm_size = size
    # batch_size = lstm_batch_size
    # learning_rate_decay = lstm_learning_rate_decay
    # init_epoch = lstm_init_epoch
    # max_epoch = lstm_max_epoch
    # hidden1_nodes = lstm_hidden1_nodes
    # hidden2_nodes = lstm_hidden2_nodes
    # dropout_rate = lstm_dropout_rate
    # init_learning_rate = lstm_init_learning_rate
    # hidden1_activation = lstm_hidden1_activation
    # hidden2_activation = lstm_hidden2_activation
    # lstm_activation = lstm_lstm_activation

    num_steps = np.int32(lstm_num_steps)
    lstm_size = np.int32(size)
    batch_size = np.int32(lstm_batch_size)
    learning_rate_decay = np.float32(lstm_learning_rate_decay)
    init_epoch = np.int32(lstm_init_epoch)
    max_epoch = np.int32(lstm_max_epoch)
    hidden1_nodes = np.int32(lstm_hidden1_nodes)
    hidden2_nodes = np.int32(lstm_hidden2_nodes)
    dropout_rate = np.float32(lstm_dropout_rate)
    init_learning_rate = np.float32(lstm_init_learning_rate)
    hidden1_activation = lstm_hidden1_activation
    hidden2_activation = lstm_hidden2_activation
    lstm_activation = lstm_lstm_activation

    # log_dir = log_dir_name(lstm_num_steps, size,lstm_hidden1_nodes,lstm_hidden2_nodes,lstm_learning_rate,lstm_init_epoch,lstm_max_epoch,
    #        lstm_learning_rate_decay,lstm_batch_size)

    train_X, train_y, test_X, test_y, val_X, val_y, nonescaled_test_y, nonescaled_val_y = pre_process()

    inputs = tf.placeholder(tf.float32, [None, num_steps, features], name="inputs")
    targets = tf.placeholder(tf.float32, [None, input_size], name="targets")
    model_learning_rate = tf.placeholder(tf.float32, None, name="learning_rate")
    model_dropout_rate = tf.placeholder_with_default(0.0, shape=())
    global_step = tf.Variable(0, trainable=False)

    prediction = setupRNN(inputs,model_dropout_rate)

    model_learning_rate = tf.train.exponential_decay(learning_rate=model_learning_rate, global_step=global_step, decay_rate=learning_rate_decay,
                                               decay_steps=init_epoch, staircase=False)

    with tf.name_scope('loss'):
        model_loss = tf.losses.mean_squared_error(targets, prediction)

    with tf.name_scope('adam_optimizer'):
        train_step = tf.train.AdamOptimizer(model_learning_rate).minimize(model_loss,global_step=global_step)

    train_step = train_step

    # with tf.name_scope('accuracy'):
    #     correct_prediction = tf.sqrt(tf.losses.mean_squared_error(prediction, targets))
    #
    # accuracy = correct_prediction

    sess = tf.Session()

    sess.run(tf.global_variables_initializer())


    for epoch_step in range(max_epoch):


        for batch_X, batch_y in generate_batches(train_X, train_y, batch_size):
            train_data_feed = {
                inputs: batch_X,
                targets: batch_y,
                model_learning_rate: init_learning_rate,
                model_dropout_rate: dropout_rate
            }
            sess.run(train_step, train_data_feed)

    val_data_feed = {
        inputs: val_X,
    }
    vali_pred = sess.run(prediction, val_data_feed)

    vali_pred_vals = rescle(vali_pred)

    vali_pred_vals = np.array(vali_pred_vals)

    vali_pred_vals = (np.round(vali_pred_vals, 0)).astype(np.int32)

    vali_pred_vals = vali_pred_vals.flatten()

    vali_pred_vals = vali_pred_vals.tolist()

    vali_nonescaled_y = nonescaled_val_y.flatten()

    vali_nonescaled_y = vali_nonescaled_y.tolist()

    val_error = sqrt(mean_squared_error(vali_nonescaled_y, vali_pred_vals))

    val_mae = mean_absolute_error(vali_nonescaled_y, vali_pred_vals)
    val_mape = mean_absolute_percentage_error(vali_nonescaled_y, vali_pred_vals)
    val_rmspe = RMSPE(vali_nonescaled_y, vali_pred_vals)



    with open(Error_file_name, "a") as f:
        writer = csv.writer(f)
        writer.writerows(
            zip([fileName], [val_error], [val_mae], [val_mape],[val_rmspe]))

    if iteration == 0:
        lowest_error = val_error

        test_data_feed = {
            inputs: test_X,
        }
        test_pred = sess.run(prediction, test_data_feed)

        test_pred_vals = rescle(test_pred)

        test_pred_vals = np.array(test_pred_vals)

        test_pred_vals = (np.round(test_pred_vals, 0)).astype(np.int32)

        test_pred_vals = test_pred_vals.flatten()

        test_pred_vals = test_pred_vals.tolist()

        test_nonescaled_y = nonescaled_test_y.flatten()

        test_nonescaled_y = test_nonescaled_y.tolist()

        test_error = sqrt(mean_squared_error(test_nonescaled_y, test_pred_vals))
        test_mae = mean_absolute_error(test_nonescaled_y, test_pred_vals)
        test_mape = mean_absolute_percentage_error(test_nonescaled_y, test_pred_vals)
        test_rmspe = RMSPE(test_nonescaled_y, test_pred_vals)

        with open("best_withZero_addi_config.csv", "a") as f:
            writer = csv.writer(f)
            writer.writerows(
                zip([fileName], [num_steps], [lstm_size], [hidden2_nodes], [hidden2_activation], [hidden1_activation],
                    [hidden1_nodes], [lstm_activation], [init_epoch], [max_epoch], [learning_rate_decay],
                    [dropout_rate], [batch_size], [init_learning_rate], [val_error],[val_mae], [val_mape],[val_rmspe], [test_error],[test_mae],[test_mape],[test_rmspe]))


    elif val_error < lowest_error:
        # Save the new model to harddisk.
        saver = tf.train.Saver()
        saver.save(sess, "checkpoints_sales/sales_pred.ckpt")

        test_data_feed = {
            inputs: test_X,
        }
        test_pred = sess.run(prediction, test_data_feed)

        test_pred_vals = rescle(test_pred)

        test_pred_vals = np.array(test_pred_vals)

        test_pred_vals = (np.round(test_pred_vals, 0)).astype(np.int32)

        test_pred_vals = test_pred_vals.flatten()

        test_pred_vals = test_pred_vals.tolist()

        test_nonescaled_y = nonescaled_test_y.flatten()

        test_nonescaled_y = test_nonescaled_y.tolist()

        bestValiPrediction = vali_pred_vals
        bestValiTrueVal = vali_nonescaled_y
        bestTestPrediction = test_pred_vals
        bestTestTrueVal =  test_nonescaled_y

        test_error = sqrt(mean_squared_error(test_nonescaled_y, test_pred_vals))

        test_mae = mean_absolute_error(test_nonescaled_y, test_pred_vals)
        test_mape = mean_absolute_percentage_error(test_nonescaled_y, test_pred_vals)
        test_rmspe = RMSPE(test_nonescaled_y, test_pred_vals)

        with open("best_withZero_addi_config.csv", "a") as f:
            writer = csv.writer(f)
            writer.writerows(
                zip([fileName], [num_steps], [lstm_size], [hidden2_nodes], [hidden2_activation],
                    [hidden1_activation],
                    [hidden1_nodes], [lstm_activation], [init_epoch], [max_epoch], [learning_rate_decay],
                    [dropout_rate], [batch_size], [init_learning_rate], [val_error],[val_mae], [val_mape],[val_rmspe], [test_error],[test_mae],[test_mape],[test_rmspe]))

        # Update the classification accuracy.
        lowest_error = val_error

    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    # sess.clear_session()
    sess.close()
    tf.reset_default_graph()

    # NOTE: Scikit-optimize does minimization so it tries to
    # find a set of hyper-parameters with the LOWEST fitness-value.
    # Because we are interested in the HIGHEST classification
    # accuracy, we need to negate this number so it can be minimized.

    iteration += 1
    return val_error


if __name__ == '__main__':

    start = time()

    for i in range(len(fileNames)):
        iteration = 0
        lowest_error = 0.0

        fileName = '{}{}{}'.format('/home/wso2/suleka/salesPred/', fileNames[i],'.csv')
        Error_file_name = '{}{}{}'.format('all_validation_errors/errors_', fileNames[i], '.csv')
        vali_data = '{}{}{}'.format('validation_data/vali__data_', fileNames[i], '.csv')
        predic_data = '{}{}{}'.format('prediction_data/predic__data_', fileNames[i], '.csv')
        Skopt_object_name = '{}{}{}'.format('/home/wso2/suleka/salesPred/skopt_objects/object_', fileNames[i], '.gz')


        column_min_max = column_min_max_all[i]

        # Bayesian optimization using Gaussian Processes.
        # acq_func -> https://arxiv.org/pdf/1807.02811.pdf

        search_result = gp_minimize(func=fitness,
                                    dimensions=dimensions,
                                    acq_func='EI',  # Expected Improvement.
                                    n_calls=300,
                                    x0=default_parameters,
                                    random_state=randomState)

        with open(vali_data, "w") as f:
            writer = csv.writer(f)
            writer.writerows(zip(bestValiTrueVal, bestValiPrediction))

        with open(predic_data, "w") as f:
            writer = csv.writer(f)
            writer.writerows(zip(bestTestTrueVal, bestTestPrediction, search_result.x))

        bestTestPrediction = None
        bestValiPrediction = None
        bestTestTrueVal = None
        bestValiTrueVal = None

        dump(search_result, Skopt_object_name, store_objective=True)


        # print()

        # plot()

    atexit.register(endlog)
    log("Start Program")

标签: python-3.xtensorflowdeep-learninglstmrandom-seed

解决方案


推荐阅读