I don't know why tf.estimator.predict predicts difference classes by size of test_data.

test_data = list()
for i in range(8, 10):
    test_data.append(mpimg.imread(FLAGS.input + 'test-' + str(i) + '.png'))
test_data = np.asarray(test_data)
test_data = test_data[:, :, :, :3].astype(np.float32)

classifier = tf.estimator.Estimator(model_fn=custom_model_fn, model_dir=FLAGS.modelPath)
pred_input = tf.estimator.inputs.numpy_input_fn(x = { 'x': test_data }, shuffle = False, num_epochs = 1)

By above code, the test_data consists of test-8.png and test-9.png that have each labels for 8 and 9.

Through the prediction of the model, the test_data infers each labels for 3 and 9.

But when I use test_data of size 3,

for i in range(7, 10):
    test_data.append(mpimg.imread(FLAGS.input + 'test-' + str(i) + '.png'))

the test_data extends its size to 3 and consists of test-7.png and test-8.png and test-9.png the have each labels for 7, 8 and 9.

But test_data infers each labels for 7, 8 and 9.

I wonder why the expected class of test-8.png is different in the first and second code. Also, I can not understand why the size of test_data affects the actual prediction.

Perhaps there is an error in the code, and I would appreciate to you if you can tell my what error it has.

if __name__ == '__main__':
   import argparse
   import scipy.io
   import matplotlib.image as mpimg

   if FLAGS.isTrain:
      mat_data = scipy.io.loadmat(FLAGS.inputMat)
      data = mat_data['X']
      labels = mat_data['y']
      data = data.reshape(data.shape[0] * data.shape[1] * data.shape[2], data.shape[3]).T
      data = data/np.float32(255)
      labels = labels.reshape(labels.shape[0]) % 10

      train_data = data[:500000]
      train_labels = make_onehot_vector(labels[:500000].astype(np.float32))


      classifier = tf.estimator.Estimator(custom_model_fn, model_dir = FLAGS.modelPath)
      tensors_to_log = {"probabilities" : "softmax_tensor"}
      logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=2000)

      train_input = tf.estimator.inputs.numpy_input_fn( x = {"x":train_data}, y = train_labels, batch_size = FLAGS.batch_size, num_epochs=FLAGS.num_epoch, shuffle = True)
      train_spec = tf.estimator.TrainSepc(input_fn=train_input, max_stpes=FLAGS.num_stpes)


      tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)
      classifier.export_savedmodel(FLAGS.modelPath, serving_input_receiver_fn=serving_input_receiver_fn)

      test_data = list()
      for i in range(8, 10):
          test_data.append(mpimg.imread(FLAGS.input + 'test-' + str(i) + '.png'))
      test_data = np.asarray(test_data)
      test_data = test_data[:, :, :, :3].astype(np.float32)
      test_data = test_data.reshape(-1, 32*32*3)

      classifier = tf.estimator.Estimator(model_fn = custom_model_fn, model_dir = FLAGS.modelPath)
      pred_input = tf.estimator.inputs.numpy_input_fn(x = {"x": test_Data}, y = None, shuffle=False, num_epochs = 1)
      pred_result = classifier.predict(input_fn = pred_input)
      pred_list = list(pred_result)

def custom_model_fn(features, labels, mode):
    input_layer = tf.reshape(features['x'], [-1, 32, 32, 3])
    isTrain = (mode == tf.estimator.ModeKeys.TRAIN)
    L1 = cnn(input_layer, 32, [5,5], [2,2], 2, phase=isTrain)
    L5 = cnn(L4, 196, [5,5], [2,2], 2, phase=isTrain)

    L5_flat = tf.reshape(L5, [-1, 196 * 3 * 3])
    L6 = dense_batch_relu(L5_flat, isTrain, 1024, 'L6')
    logits = tf.layers.dense(inputs=L6, units = 10, activation=None)
    predictions = {"classes": tf.argmax(input=logits, axis=1), "probabilities": tf.nn.softmax(logits, name="softmax_tensor")}

    if mode == tf.estimator.ModeKeys.PREDICT:
        export_outputs = {"predict_output": tf.estimator.export.PredictOutput(predictions)}
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, export_outputs=export_outputs)

    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)) + tf.losses.get_regularization_loss()

    if mode == tf.estimator ModeKeys.TRAIN:
       optimizer = tf.train.AdamOptimizer(5e-4)
       train_op = optimizer.minimize(loss=loss, global_step = tf.train.get_global_step())
       return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    Y = tf.argmax(labels, 1)
    eval_metric_ops = {"acc" : tf.metrics.accuracy(labels=Y, predictions=predictions['classes'])}
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def serving_input_receiver_fn():
    inputs = {
        'x':tf.placeholder(tf.float32, [None, 32*32*3]),
    return tf.estimator.export.ServingInputReceiver(inputs, inputs)

In this code, why is the smaller the test data, the more inaccurate the predicted value and the larger the size, the more accurate the predicted value?

def dense_batch_relu(x, phase, unit, scope, dropout_rate=0.3):
   with tf.variable_scope(scope):
      reg = tf.contrib.layers.l2_regularizer(scale = 5e-3)
      l1 = tf.layers.dense(x, unit, activation = None, kernel_regularizer=reg)
      l2 = tf.layers.batch_normalization(inputs=l1, training=phase)
      l3 = tf.layers.dropout(l2, dropout_rate, training=phase)

      return tf.nn.relu(l3, 'relu')



if mode == tf.estimator ModeKeys.TRAIN:
   update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
   with tf.control_dependencies(update_ops):
       optimizer = tf.train.AdamOptimizer(5e-4)
       train_op = optimizer.minimize(loss=loss, global_step = tf.train.get_global_step())
       return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

