首页 > 解决方案 > 如何使用 tensorflow 让 lstm/rnn 更多地关注时间序列的某些部分,而更少关注其他部分?

问题描述

我有一个时间序列预测问题,其中大多数观察值(95%)是 0,而其余值是非零。我如何利用 RNN 来解决这个问题。

我想根据环境数据(气温、降雨量、湿度等)预测地表流量。我们知道一年中大部分时间地表流量为 0.0。但是,我也不想简单地忽略 0,因为 0 代表一年中地表流量为 0.0 的时期。下图显示了可能观察到的输出和三个输入。这里的三个输入只是随机的,但实际上它们将是降雨、湿度等数据,并且这些输入数据具有一些周期性模式。

在此处输入图像描述

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import psutil
import tensorflow as tf
import sys
print(sys.version)
print('tensorflow version: ', tf.__version__)


#clean computation graph
tf.reset_default_graph()

tf.set_random_seed(777)  # reproducibility
np.random.seed(0)

def MinMaxScaler(data):    
    numerator = data - np.min(data, 0)
    denominator = np.max(data, 0) - np.min(data, 0)
    # noise term prevents the zero division
    return numerator / (denominator + 1e-7)

class generate_data(object):

    def __init__(self, data_len,  in_series, y_pred, seq_lengths, method='sum' ):
        self.data_len = data_len
        self.data = None
        self.in_series = in_series #number of input series
        self.y_pred = y_pred  #number of final outputs from model
        self.seq_lengths = seq_lengths
        self.method = method

    def _f(self, x):
        y = 0
        result = []
        for _ in x:
            result.append(y)
            y += np.random.normal(scale=1)
        return np.array(result)

    def _runningMean(self, x, N):
        return np.convolve(x, np.ones((N,))/N)[(N-1):]


    def sine(self):
        DATA = np.zeros((self.data_len, self.in_series))
        xx = [None]
        data_0 = np.sin(np.linspace(0, 20, self.data_len*self.in_series))
        xx = data_0.reshape(self.data_len, self.in_series)
        DATA[:,0: self.in_series] = xx            
        y = self._get_y(DATA)
        return xx,y, DATA


    def _get_y(self, xx):
        if self.method=='sum':
            yy = np.array([np.sum(xx[i,:]) for i in range(np.shape(xx)[0])])
        elif self.method == 'mean':
              yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])

        elif self.method == 'self_mul':
            yy = np.array([np.prod(xx[i,:]) for i in range(np.shape(xx)[0])])
        elif self.method == 'mean_mirror':
            yy = np.array([np.mean(xx[i,:]) for i in range(np.shape(xx)[0])])
        return yy


    def normalize(self, xx1,yy1):

        yy = [None]*len(yy1)
        YMinMax = {}

        xx = MinMaxScaler(xx1)
        for i in range(self.y_pred):
            YMinMax['ymin_' + str(i)] = np.min(yy1[0])
            YMinMax['ymax_' + str(i)] = np.max(yy1[0])
            yy[i] = MinMaxScaler(yy1[0])
        setattr(self, 'YMinMax', YMinMax)
        return xx,yy


    def create_dataset(self, xx, yy, percent_of_zeros):
        '''creates a dataset consisting of windows for x and y data'''
        dataX = self._build_input_windows(xx, self.seq_lengths)

        if self.y_pred > 1:
            pass
        elif self.y_pred > 1 and self.seq_lengths != any(self.seq_lengths):
            pass
        else: 
            dataY = self._build_y_windows(yy[0] , self.seq_lengths)   

        indices = np.random.choice(np.arange(dataY.size), replace=False,
                           size=int(dataY.size * percent_of_zeros))
        dataY[indices] = 0

        return dataX, dataY


    def _build_input_windows(self, time_series, seq_length):
        dataX = []
        for i in range(0, len(time_series) - seq_length):
            _x = time_series[i:i + seq_length, :]
            dataX.append(_x)
        return np.array(dataX)   


    def _build_y_windows(self, iny, seq_length):        
        dataY = []
        for i in range(0, len(iny) - seq_length):
            _y = iny[i + seq_length, ]  # Next close price           
            dataY.append(_y)
        return  np.array(dataY)


    def TrainTestSplit(self, dataX, dataY, train_frac):

        train_size = int(len(dataY) * train_frac)            
        trainX, testX = np.array(dataX[0:train_size]), np.array(dataX[train_size:len(dataX)])

        trainY, testY = np.array(dataY[0:train_size]), np.array(dataY[train_size:len(dataY)])
        trainY = trainY.reshape(len(trainY), 1)
        testY = testY.reshape(len(testY), 1)  
        return trainX, trainY, testX, testY, train_size

#training/hyper parameters
tot_epochs = 500 
batch_size = 16
learning_rate = 0.01
seq_lengths = 5  #sequence lengths/window size for  RNN
rnn_inputs = 3 # no of inputs for  RNN
y_pred = 1
data_length = 1005  #this can be overwritten or useless
gen_data = generate_data(data_length,  rnn_inputs, y_pred, seq_lengths, 'sum')
xx,yy,data_1 = gen_data.sine() 
# xx = abs(xx)
train_frac = 0.8
xx1,yy1 = gen_data.normalize(xx,[yy])
zeros = 0.96
dataX, dataY = gen_data.create_dataset(xx1,yy1, zeros)
trainX, trainY, testX, testY, train_size = gen_data.TrainTestSplit( dataX, dataY, train_frac)

keep_prob = tf.placeholder(tf.float32)
x_placeholders = tf.placeholder(tf.float32, [None, 5, 3])
Y =  tf.placeholder(tf.float32, [None, 1])

plt.plot(dataY, '.', label='output')
plt.plot(xx[:,0], '.', label='input1')
plt.plot(xx[:,1], '.', label='input2')
plt.plot(xx[:,2], '.', label='input3')
plt.legend()

# build neural network
with tf.variable_scope('scope0'):  #defining  RNN
    # cell = tf.contrib.rnn.BasicLSTMCell(num_units= 7, state_is_tuple=True, activation=tf.tanh)
    cell = tf.keras.layers.LSTMCell(units = 128)
    outputs1, _states = tf.nn.dynamic_rnn(cell, x_placeholders, dtype=tf.float32)
    # Y_pred1 = tf.contrib.layers.fully_connected(outputs1[:, -1], 1, activation_fn=None)
    Y_pred1 = tf.keras.layers.Dense(1)(outputs1[:,-1])
Y_pred = Y_pred1

## cost/loss
loss = tf.reduce_sum(tf.square(Y_pred - Y))  # sum of the squares
## optimizer
optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
#
## RMSE
targets = tf.placeholder(tf.float32, [None, 1])
predictions = tf.placeholder(tf.float32, [None, 1])
rmse = tf.sqrt(tf.reduce_mean(tf.square(targets - predictions)))


with tf.Session() as sess:
    saver = tf.train.Saver(max_to_keep=41)
    writer = tf.summary.FileWriter('./laos_2out/cnntest', sess.graph)

    init = tf.global_variables_initializer()
    sess.run(init)

    # Training step
    for epoch in range(tot_epochs):

      total_batches = int(train_size / batch_size)  ##total batches/ no. of steps in an epoch

      #for batch in range(total_batches):
      _, step_loss = sess.run([train, loss], feed_dict= {x_placeholders:trainX, Y:trainY, keep_prob:0.5} )
      print('epoch: # {} loss: {}'.format(epoch, step_loss))

#    # evaluating on test data
    test_predict = sess.run(Y_pred, feed_dict= {x_placeholders:testX, Y:trainY, keep_prob:0.5} )

    #evaluating on training data
    train_predict = sess.run(Y_pred, feed_dict={x_placeholders:trainX, Y:trainY, keep_prob:0.5})

    rmse_val = sess.run(rmse, feed_dict={targets: testY, predictions: test_predict})
    print("RMSE: {}".format(rmse_val))

# Plot predictions
  fig, (ax1,ax2) = plt.subplots(1,2, sharey=True)
  fig.set_figwidth(14)
  fig.set_figheight(5)
  ax2.plot(testY, 'b', label='observed')
  ax2.plot(test_predict, 'k', label='predicted')
  ax2.legend(loc="best")
  ax2.set_xlabel("Time Period")
  ax2.set_title('Testing')
  ax1.plot(trainY, 'b', label='observed')
  ax1.plot(train_predict, 'k',label= 'predicted')
  ax1.legend(loc="best")
  ax1.set_xlabel("Time Period")
  ax1.set_ylabel("discharge (cms)")
  ax1.set_title('Training')
  plt.show()

问题是,在训练时,模型关注大多数值,即 0,因此预测等于 0。如何使模型专注于非零值(正表面流),同时还考虑 0(当没有表面流时)。我已经阅读了注意力机制,但不明白如何在这种情况下实现它。

标签: tensorflowdeep-learninglstmrecurrent-neural-networkattention-model

解决方案


推荐阅读