首页 > 解决方案 > 如何操作我的数据以计算具有多个 NA 实例的 R 中的平均值?

问题描述

我正在使用 R Studio,并且有一个非常大的数据集,其中包含 4 个变量。我正在尝试计算 O2 柱每天 06:30:00 到 19:00:00 之间的时间平均值。数据集的示例如下所示:

RBWQ <- read.csv(".../R/WY06 to WY19.csv", skip = 2, header = TRUE, sep = ",", na.strings=c("","NA"))
RBWQ <- setNames(RBWQ, c("datetime","NDCQ","O2","SpCond","PaseoQ"))
View(RBWQ)

    datetime              NDCQ O2   SpCond PaseoQ
1   2005-10-01 00:00:00   NA   NA   NA     827
2   2005-10-01 00:15:00   NA   NA   NA     857
3   2005-10-01 00:30:00   NA   NA   NA     857
4   2005-10-01 00:45:00   NA   NA   NA     807
5   2005-10-01 01:00:00   NA   10   300    797
6   2005-10-01 01:15:00   NA   10.3 319    817
7   2005-10-01 01:30:00   NA   10   401    797
8   2005-10-01 01:45:00   NA   10.2 398    788
9   2005-10-01 02:00:00   NA   NA   NA     788
10  2005-10-01 02:15:00   NA   NA   NA     807

每小时有 15 分钟的间隔,如您所见,此数据集中有很多 NA。如何操作此数据集确定每天特定时间段内的平均值,并让它在 R 中生成包含日期和平均值的表中的报告?

dput(head(RBWQ))

    structure(list(datetime = structure(list(sec = c(0, 0, 0, 0, 
0, 0), min = c(0L, 15L, 30L, 45L, 0L, 15L), hour = c(0L, 0L, 
0L, 0L, 1L, 1L), mday = c(1L, 1L, 1L, 1L, 1L, 1L), mon = c(9L, 
9L, 9L, 9L, 9L, 9L), year = c(105L, 105L, 105L, 105L, 105L, 105L
), wday = c(6L, 6L, 6L, 6L, 6L, 6L), yday = c(273L, 273L, 273L, 
273L, 273L, 273L), isdst = c(1L, 1L, 1L, 1L, 1L, 1L), zone = c("MDT", 
"MDT", "MDT", "MDT", "MDT", "MDT"), gmtoff = c(NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), class = c("POSIXlt", 
"POSIXt")), NDCQ = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), O2 = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), SpCond = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), PaseoQ = c(827L, 857L, 857L, 807L, 
797L, 817L)), row.names = c(NA, 6L), class = "data.frame")

某些日子可能会因为各种原因(例如被埋没)而导致仪器读数不正确,可能会出现一整天的 NA。

dput(head(RBWQ, 96))
    structure(list(datetime = structure(list(sec = c(0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 15L, 30L, 45L, 0L, 15L, 
    30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 
    45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 
    0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 
    15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 
    30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 
    45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 
    0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L, 0L, 15L, 30L, 45L), hour = c(0L, 
    0L, 0L, 0L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 
    4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 
    8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 
    11L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 
    15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 18L, 
    18L, 18L, 18L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L, 21L, 
    21L, 21L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L), mday = c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
        mon = c(9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
        9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
        9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
        9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
        9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
        9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
        9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L), year = c(105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 105L, 
        105L, 105L, 105L), wday = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
        6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
        6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
        6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
        6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
        6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
        6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), yday = c(273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 273L, 
        273L, 273L, 273L, 273L, 273L), isdst = c(1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L), zone = c("MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", 
        "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT", "MDT"
        ), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
        NA_integer_, NA_integer_)), class = c("POSIXlt", "POSIXt"
    )), NDCQ = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
    ), O2 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
        SpCond = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
        NA_real_), PaseoQ = c(827L, 857L, 857L, 807L, 797L, 817L, 
        797L, 788L, 788L, 807L, 788L, 797L, 807L, 797L, 778L, 797L, 
        807L, 797L, 750L, 741L, 750L, 714L, 714L, 696L, 696L, 687L, 
        687L, 678L, 678L, 661L, 669L, 661L, 678L, 678L, 696L, 661L, 
        644L, 635L, 661L, 644L, 644L, 635L, 635L, 627L, 635L, 652L, 
        652L, 652L, 644L, 635L, 644L, 652L, 661L, 652L, 661L, 635L, 
        661L, 644L, 652L, 635L, 644L, 619L, 619L, 619L, 635L, 635L, 
        635L, 652L, 635L, 652L, 652L, 661L, 652L, 652L, 652L, 652L, 
        661L, 652L, 644L, 627L, 619L, 635L, 619L, 619L, 603L, 611L, 
        603L, 603L, 619L, 635L, 635L, 635L, 627L, 619L, 635L, 619L
        )), row.names = c(NA, 96L), class = "data.frame")

标签: rmean

解决方案


对于时间序列数据,请考虑xts包。根据您的数据创建一个时间序列(第一列中包含您的日期时间)。然后根据时间范围进行子集化,并用于colMeans计算每日均值(去除NA)。如果这是您的想法,请告诉我。

library(xts)

RBWQ_xts <- xts(RBWQ[, -1], RBWQ[, 1])

apply.daily(RBWQ_xts["T06:30:00/T19:00:00"], colMeans, na.rm = TRUE)

推荐阅读