首页 > 解决方案 > 如何在通过 ibm watson 中的实验建模器执行超参数优化时修复错误“无法确定运行时间”

问题描述

我正在使用 keras 中的 LSTM 构建时间序列预测模型。对于模型训练,我试图通过从以下文章中找到的 Experiment modeler 中的 HPO 获得最佳超参数组合。

https://medium.com/ibm-watson/automating-and-accelerating-hyperparameter-tuning-for-deep-learning-52184944eeb4

经过多次尝试后,我让训练开始运行,但我不断收到错误消息说“无法确定训练运行的运行时间”。我检查了日志,但没有看到任何错误。下面是我使用的代码,在日志中它成功运行到打印“平均绝对错误”的位置。

除了此代码之外,我还在存储训练数据的存储桶中提供输入文件“Global_temperatures_final.csv”,并创建一个实验,通过在训练定义中手动添加超参数来提供超参数。

import keras
import pandas as pd
import numpy as np
import json
import os
import os.path
from os import environ
from keras.callbacks import TensorBoard
from emetrics import EMetrics
import h5py
from keras.layers import Dense, Input, LSTM
from keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
# input data
if __name__=="__main__":
    input_data_folder=os.environ["DATA_DIR"]
    Global_Temp=pd.read_csv(input_data_folder+'/Global_temperature_final.csv')
# file containing hyperparameters
config_file="config.json"
if os.path.exists(config_file):
    with open(config_file,'r') as f:
        json_obj=json.load(f)
    Timesteps=json_obj["Timesteps"] 
    Ep=json_obj["Ep"]
    Neurons=json_obj["Neurons"] 
    BatchSizes=json_obj["BatchSizes"]
else:
    Timesteps=1
    Ep=1
    Neurons=1 
    BatchSizes=2
model_filename = "lstm_gtemp.h5"
# writing the train model and getting input data
if environ.get('RESULT_DIR') is not None:
    output_model_folder = os.path.join(os.environ["RESULT_DIR"], "model")
    output_model_path = os.path.join(output_model_folder, model_filename)
else:
    output_model_folder = "model"
    output_model_path = os.path.join("model", model_filename)

os.makedirs(output_model_folder, exist_ok=True)

#writing metrics
if environ.get('JOB_STATE_DIR') is not None:
    tb_directory = os.path.join(os.environ["JOB_STATE_DIR"], "logs", "tb", "test")
else:
    tb_directory = os.path.join("logs", "tb", "test")

os.makedirs(tb_directory, exist_ok=True)
tensorboard = TensorBoard(log_dir=tb_directory)
def getCurrentSubID():
    if "SUBID" in os.environ:
        return os.environ["SUBID"]
    else:
        return None

class HPOMetrics(keras.callbacks.Callback):
    def __init__(self):
        self.emetrics = EMetrics.open(getCurrentSubID())

    def on_epoch_end(self, epoch, logs={}):
        train_results = {}
        test_results = {}

        for key, value in logs.items():
            if 'val_' in key:
                test_results.update({key: value})
            else:
                train_results.update({key: value})

        print('EPOCH ' + str(epoch))
        self.emetrics.record("train", epoch, train_results)
        self.emetrics.record(EMetrics.TEST_GROUP, epoch, test_results)

    def close(self):
        self.emetrics.close()
Global_Temp['Date']=pd.to_datetime(Global_Temp['Date'])
Global_Temp.set_index('Date',drop=True,inplace=True)
Training_data,Testing_data=train_test_split(Global_Temp,test_size=0.1,shuffle=False)
def Data_preperation(Data,BatchSize,Timesteps):
    #For the Stateful LSTM we need to have [(length of training_data)-timesteps] exactly divisible by the batch-size so that 
    #we can divide the whole training data set into batchs to be supplied for training the model
    length=int((len(Data)-2*Timesteps)/BatchSize)*BatchSize
    #Clipping the training data-set
    Data=Data[:length+Timesteps*2]
    # reshape the data for feature scaling
    Data_1=Data['Monthly_Temperature'].values.reshape(-1,1)
    # Feature scaling
    Scaling=MinMaxScaler(feature_range=(0,1))
    Data_scaled=Scaling.fit_transform(np.float64(Data_1))
    # Create x,y data for LSTM training or prediction
    x=[]
    y=[]
    for i in range(Timesteps,length+Timesteps):
        x.append(Data_scaled[i-Timesteps:i,0])
        y.append(Data_scaled[i:i+Timesteps,0])
    #Reshaping for LSTM with dimensions [Length X Timesteps X 1]
    #By creating strips of time-series of size of timesteps
    x, y = np.array(x), np.array(y)
    x=np.reshape(x,(x.shape[0],x.shape[1],1))
    y=np.reshape(y,(y.shape[0],y.shape[1],1))
    return x,y
x_train,y_train=Data_preperation(Training_data,BatchSize=BatchSizes,Timesteps=Timesteps)
x_test,y_test=Data_preperation(Testing_data,BatchSize=BatchSizes,Timesteps=Timesteps)
input_layer=Input(batch_shape=(BatchSizes,Timesteps,1))
lstm_layer=LSTM(Neurons,stateful=True,return_sequences=True)(input_layer)
output_layer=Dense(units=1)(lstm_layer)
model=Model(inputs=input_layer,outputs=output_layer)
model.summary()
model.compile(optimizer='adam',loss='mae')

hpo = HPOMetrics()

#Because the LSTM network is stateful therefore I reset the internal states after each epoch.
for e in range(Ep):
    model.fit(x_train,y_train,shuffle=False,epochs=1,batch_size=BatchSizes,verbose=0,callbacks=[tensorboard, hpo])
    model.reset_states()
hpo.close()
def Model_Forecast(x,model,BatchSize):
    yhat=model.predict(x,batch_size=BatchSize)
    return yhat
def Model_evaluation(y,yhat):
    MAE=mean_absolute_error(np.reshape(y,y.shape[0]*y.shape[1],1),np.reshape(yhat,yhat.shape[0]*yhat.shape[1],1))
    return MAE 
y_pred=Model_Forecast(x_test,model=model,BatchSize=BatchSizes)
MAE_score=Model_evaluation(y_test,y_pred)
print('Mean absolute error ',MAE_score)
# save the model
model.save(output_model_path)

标签: pythonoptimizationibm-watson

解决方案


推荐阅读