首页 > 解决方案 > 神经网络拟合但结果给出错误的列数

问题描述

我想学习如何用 R 拟合 stan 中的神经网络模型。我正在关注本网站上的示例:http: //srmart.in/neural-networks-in-stan-or-how-i-was-完全惊讶于它完全起作用/

我的第一步是从字面上复制并粘贴适合分类模型所需的代码,然后运行它。它立即失败了。而且我不知道为什么它失败了。

当我运行模型时,它会给我一个输出。但是当我想读取后验样本以获取我的验证集的预测时,我收到以下错误消息:

Warning message:
In matrix(., N_test, 3, byrow = TRUE) :
  data length [151] is not a sub-multiple or multiple of the number of rows [50]

R代码:

library(rstan)
library(magrittr)

sm <- stan_model("./stan_model.stan")

fit_nn_cat <- function(x_train, y_train, x_test, y_test, H, n_H, method = "optimizing", ...) {
  stan_data <- list(
    N = nrow(x_train),
    P = ncol(x_train),
    x = x_train,
    labels = y_train,
    H = H,
    n_H = n_H,
    N_test = length(y_test)
  )
  if(method == "optimizing") {
    optOut <- optimizing(sm, data = stan_data)
    test_char <- paste0("output_test[",1:length(y_test), ",",rep(1:max(y_train), each = length(y_test)),"]") 
    y_test_pred <- matrix(optOut$par[test_char], stan_data$N_test, max(y_train))
    y_test_cat <- apply(y_test_pred, 1, which.max)
    out <- list(y_test_pred = y_test_pred,
                y_test_cat = y_test_cat,
                conf = table(y_test_cat, y_test),
                fit = optOut)
    return(out)
  } else if(method == "sampling") {
    out <- sampling(sm, data = stan_data, pars = "output_test", ...)
    return(out)
  } 
}

data(iris)
x <- iris[,1:4]
y <- as.numeric(as.factor((iris[,"Species"])))

N_test <- 50
test_indices <- sample(1:nrow(x), N_test)
x_train <- x[-test_indices,]
y_train <- y[-test_indices]
x_test <- x[test_indices,]
y_test <- y[test_indices]

fit_nuts <- fit_nn_cat(x_train, y_train, x_test, y_test, 2, 50, method = "sampling", cores = 4, iter = 1000)

cat_nuts <- summary(fit_nuts)$summary[,"mean"] %>%
  matrix(N_test, 3, byrow = TRUE) %>%
  apply(1, which.max)
table(cat_nuts, y_test)

倒数第二行是错误发生的地方。

斯坦代码:

functions {
  vector[] nn_predict(matrix x, matrix d_t_h, matrix[] h_t_h, matrix h_t_d, row_vector[] hidden_bias, row_vector y_bias) {
    int N = rows(x);
    int n_H = cols(d_t_h);
    int H = size(hidden_bias);
    int num_labels = cols(y_bias) + 1;
    matrix[N, n_H] hidden_layers[H];
    vector[num_labels] output_layer_logit[N];
    vector[N] ones = rep_vector(1., N);

    hidden_layers[1] = inv_logit(x * d_t_h + ones * hidden_bias[1]);
    for(h in 2:H) {
      hidden_layers[h] = inv_logit(hidden_layers[h-1] * h_t_h[h - 1] + ones * hidden_bias[h]);
    }
    for(n in 1:N) {
      output_layer_logit[n, 1] = 0.0;
      output_layer_logit[n, 2:num_labels] = (hidden_layers[H, n] * h_t_d + y_bias)';
    }
    return(output_layer_logit);
  }
}

data {
  int N; // Number of training samples
  int P; // Number of predictors (features)
  matrix[N, P] x; // Feature data
  int labels[N]; // Outcome labels
  int H; // Number of hidden layers
  int n_H; // Number of nodes per layer (All get the same)

  int N_test; // Number of test samples
  matrix[N_test, P] x_test; // Test predictors
}

transformed data {
  int num_labels = max(labels); // How many labels are there
}

parameters {
  matrix[P, n_H] data_to_hidden_weights; // Data -> Hidden 1
  matrix[n_H, n_H] hidden_to_hidden_weights[H - 1]; // Hidden[t] -> Hidden[t+1]
  matrix[n_H, num_labels - 1] hidden_to_data_weights; // Hidden[T] -> Labels. Base class gets 0.
  // ordered[n_H] hidden_bias[H]; // Use ordered if using NUTS
  row_vector[n_H] hidden_bias[H]; // Hidden layer biases
  row_vector[num_labels - 1] labels_bias; // Labels biases. Base class gets 0.
}

transformed parameters {
  vector[num_labels] output_layer_logit[N]; // Predicted output layer logits

  output_layer_logit = nn_predict(x,
                                  data_to_hidden_weights,
                                  hidden_to_hidden_weights,
                                  hidden_to_data_weights,
                                  hidden_bias,
                                  labels_bias);

}

model {
  // Priors
  to_vector(data_to_hidden_weights) ~ std_normal();

  for(h in 1:(H-1)) {
    to_vector(hidden_to_hidden_weights[h]) ~ std_normal();
  }

  to_vector(hidden_to_data_weights) ~ std_normal();

  for(h in 1:H) {
    to_vector(hidden_bias[h]) ~ std_normal();
  }
  labels_bias ~ std_normal();

  for(n in 1:N) { // Likelihood
    labels[n] ~ categorical_logit(output_layer_logit[n]);
  }
}

generated quantities {
  vector[num_labels] output_layer_logit_test[N_test] = nn_predict(x_test,
                               data_to_hidden_weights,
                               hidden_to_hidden_weights,
                               hidden_to_data_weights,
                               hidden_bias,
                               labels_bias);
  matrix[N_test, num_labels] output_test;
  for(n in 1:N_test) {
    output_test[n] = softmax(output_layer_logit_test[n])';
  }
}

起初我认为参数初始化有问题。但我是 STAN 的新手,我可能不完全了解正在发生的事情。在最后的 for 循环中,我希望 output_test 变量能够获取N_test列。And N_test= 50。然而在返回的模型中, output_test 有 151 列。

这里出了什么问题?

标签: rneural-networkstan

解决方案


推荐阅读