r - 数据帧内多个组的平滑预测
问题描述
我正在尝试使用非线性回归(NLR)函数来预测值(y)随时间(x)的变化,然后计算预测达到最大值(最佳)的时间。我得到了关于实际测量值 (y) 的预测,这很好,但是这些预测锚定在 x 值上,这意味着我只能得到某些增量的预测值。这可以在下图中看到。
这意味着计算出的最佳值将始终处于 x 值之一,但我正在使用此 NLR 函数来获得 y 最佳时间的数学合理估计。
我不知道问题是否出在我获取这些值的方法中,但这里有一个示例:
dat <- structure(list(measure = structure(c(1L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 2L, 3L, 1L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 1L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 2L, 3L,
4L, 5L, 1L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 2L, 3L, 4L,
5L), .Label = c("L1", "L10", "L11", "L12", "L13", "L14", "L15",
"L16", "L17", "L18", "L19", "L2", "L3", "L4", "L5", "L6", "L7",
"L8", "L9"), class = "factor"), sample = structure(c(64L, 64L,
64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 64L, 65L, 65L, 65L, 65L,
65L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L,
66L, 66L, 66L, 66L, 66L, 67L, 67L, 67L, 67L, 67L, 67L, 67L, 67L,
67L, 67L, 67L, 67L, 67L), .Label = c("010719A", "010719B", "010719C",
"020419A", "020419B", "020419C", "040219A", "040219B", "040219C",
"040319A", "040319B", "040319C", "050219A", "050219B", "050219C",
"060519B", "070519A", "070519B", "070519C", "080419A", "080419B",
"080419C", "080719A", "080719B", "080719C", "090419A", "090419B",
"090419C", "100419A", "100419B", "100419C", "110219A", "110219B",
"110219C", "110319A", "110319B", "110319C", "110619A", "110619B",
"110619C", "120609A", "120609B", "120609C", "130519A", "130519B",
"130519C", "140519A", "140519B", "140519C", "150419A", "150419B",
"150419C", "170619A", "170619B", "170619C", "180219B", "180219C",
"180319A", "180319B", "180319C", "180619A", "180619B", "180619C",
"220119A", "220119C", "230119A", "230119B", "230119C", "250219A",
"250219B", "250219C", "250319A", "250319B", "250319C", "260319A",
"260319B", "260319C", "280119A", "280119B", "280119C", "290119A",
"290119B", "290119C", "300119A", "300119B", "300119C"), class = "factor"),
y = c(0, 10, 10, 13.33, 16.67, 16.67, 26.67, 13.33, 30, 36.67,
26.67, 0, 3.33, 3.33, 10, 16.67, 16.67, 3.33, 3.33, 0, 0,
0, 11.43, 20, 14.29, 14.29, 20, 14.29, 2.86, 17.14, 28.57,
34.29, 11.43, 0, 2.94, 2.94, 11.76, 20.59, 20.59, 23.53,
20.59, 14.71, 17.65, 32.35, 20.59, 8.82), x = c(0, 5.833,
8.667, 12, 14.667, 16.833, 23.667, 29.833, 32.833, 35.833,
38.583, 0, 5.833, 8.667, 12, 14.667, 16.833, 23.667, 29.833,
32.833, 0, 5.833, 8.833, 11.917, 14.667, 16.917, 23.667,
29.833, 32.833, 35.833, 38.833, 41.583, 47.833, 0, 5.833,
8.833, 11.917, 14.667, 16.917, 23.667, 29.833, 32.833, 35.833,
38.833, 41.583, 47.833)), row.names = c(NA, -46L), class = c("tbl_df",
"tbl", "data.frame"))
这是我正在使用的一个片段。这是我如何得到每个 x 和 y 值的预测。
library(tidyverse)
library(modelr)
samples <- dat$sample[dat$measure == "L1"]
output <- tibble(predictions = c(0))
for (i in seq_along(samples)) {
df <- tibble(ex = dat$x[dat$sample == samples[i]],
why = dat$y[dat$sample == samples[i]])
nlm <- nls(df$why ~ alpha * df$ex^beta * exp((-gamma) * df$ex),
data = df,
start = list(alpha = 1.5, beta = 1.85, gamma = 0.095),
control = list(maxiter = 10000))
output <- add_row(output, predictions = predict(nlm, newdata = df$ex))
output <- output %>%
mutate(predictions = round(predictions, digits = 2))
}
output <- output[-1,]
dat <- dat %>%
mutate(pred = output$predictions)
以此制作 ggplot 会产生与上图相同的结果。简而言之,我不知道如何在图的两个或多个点之间平滑地推断(插值?),然后计算该图(线)何时处于最佳状态。有没有办法可以在点之间进行预测?它可以迭代地完成吗?我需要执行此操作的完整数据中有近 100 个样本。
解决方案
不久:
您可以在使用时定义一个新的数据框predict
:
df <- dat[dat$sample == dat$sample[1],]
nlm <- nls(y ~ alpha * x^beta * exp((-gamma) * x),
data = df,
start = list(alpha = 1.5, beta = 1.85, gamma = 0.095),
control = list(maxiter = 10000))
predicted <- data.frame(x = seq(min(df$x),max(df$x),0.01),
y = predict(nlm,newdata = data.frame(x = seq(min(df$x),max(df$x),0.01))))
在这里,它为您提供了很多积分,这应该可以让您达到最大值。但:
- 当您使用模型时,您可以做一些数学运算以从估计的系数中获得最大值,我认为这会更好。在这里您可以计算导数并找到函数最大值
- 如果您想要一些局部最大值并且无法计算导数,那么您可以尝试根据您的估计来估计导数,并找到导数的零点
推荐阅读
- flutter - 为什么我的颤振中没有 padLeft 功能?
- php - Laravel Sanctum - 通证的一般理解和耳目一新
- python - 如何在python中制作网页截图
- python - 查找列表中给出的 n 个字符串列表中有多少个字符?
- r - 如何根据其他模态字段隐藏\显示\切换R闪亮模态中的某些字段
- python - Stumpy:获得普通距离而不是 z 归一化版本的选项
- html - 如何使用 html 和 css 在不同高度和宽度的中心设置图像
- java - java-project - 可比接口
- python - pyspark 是否有 org.apache.spark.functions.transform 的等价物?
- javascript - 电子 - 在打开保存对话框之前写入文件