r - 如何使用 30 年平均值计算温度偏离正常值 - R 4.0.0
问题描述
我在下面添加了我的整个代码,所以我验证的是有效的:
我知道我可以在我想要的特定时间范围内从 NCDC 网站下载任何气候站。附带说明一下,如果您可以查看我的“bind_rows()”命令并使其不那么混乱,那么我找不到更好的方法来做到这一点。
我知道
TAVG
是计算和工作每月摘要,这使得 data.set mso_sum 完美运行
那么什么不起作用:
- 发现我偏离了 30 年的规范
我希望它如何工作:
- 过滤年份 1981:2010
- 分组,因此可以按天汇总每个 1 月 1 日、1 月 2 日、1 月 3 日等、2 月 1 日、2 月 2 日等
- 总结平均 TAVG(从 MaxT 和 MinT 中找到的温度平均值)
- 然后取整个数据集并从 CliAvgT 中减去每日 TAVG
这是我试过的代码:
mso_light %>%
group_by(month, day) %>%
summarise(CliAvgT = mean(TAVG[1981:2010], na.rm = T)) %>%
mutate(Avg_DepT = CliAvgT - TAVG) %>%
ungroup()
我也试过这个备用代码:
mso_light %>%
filter(year >= "1981", year <= "2010") %>%
group_by(month, day) %>%
summarise(CliAvgT = mean(TAVG, na.rm = T)) %>%
mutate(Avg_DepT = CliAvgT - TAVG) %>%
ungroup()
并收到此错误消息:
> mso_light %>%
Warning messages:
1: Unknown or uninitialised column: `value`.
2: Unknown or uninitialised column: `value`.
+ filter(year >= "1981", year <= "2010") %>%
+ group_by(month, day) %>%
+ summarise(CliAvgT = mean(TAVG, na.rm = T)) %>%
+ mutate(Avg_DepT = CliAvgT - TAVG) %>%
+ ungroup()
`summarise()` regrouping output by 'month' (override with `.groups` argument)
Error: Problem with `mutate()` input `Avg_DepT`.
x object 'TAVG' not found
i Input `Avg_DepT` is `CliAvgT - TAVG`.
i The error occured in group 1: month = 1.
Run `rlang::last_error()` to see where the error occurred.
最后这是我的所有代码:
library(rnoaa)
library(tidyverse)
library(data.table)
library("openair")
library("chron")
library('lubridate')
## grab first half of year
getNoaaP1 <- function(yr, type = c('tmax','tmin','PRCP', 'SNOW', 'SNWD'))
ncdc(datasetid = 'GHCND',
stationid = 'GHCND:USW00024153',
datatypeid = type,
startdate = paste0(yr, '-01-01'),
enddate = paste0(yr, '-06-30'), limit = 1000)
## grab second half of year
getNoaaP2 <- function(yr, type = c('tmax','tmin','PRCP', 'SNOW', 'SNWD'))
ncdc(datasetid = 'GHCND',
stationid = 'GHCND:USW00024153',
datatypeid = type,
startdate = paste0(yr, '-07-01'),
enddate = paste0(yr, '-12-31'), limit = 1000)
res1 <- setNames(lapply(1948:2020, getNoaaP1), paste0("Year", 1948:2020, "P1"))
res <- setNames(lapply(1948:2020, getNoaaP2), paste0("Year", 1948:2020, "P2"))
# this would export all individual list elements to the global environment:
list2env(res, envir = .GlobalEnv)
list2env(res1, envir = .GlobalEnv)
# this would combine the individual lists
mso <- bind_rows(Year1948P1$data, Year1949P1$data, Year1950P1$data, Year1951P1$data,
Year1952P1$data, Year1953P1$data, Year1954P1$data, Year1955P1$data,
Year1956P1$data, Year1957P1$data, Year1958P1$data, Year1959P1$data,
Year1960P1$data, Year1961P1$data, Year1962P1$data, Year1963P1$data,
Year1964P1$data, Year1965P1$data, Year1966P1$data, Year1967P1$data,
Year1968P1$data, Year1969P1$data, Year1970P1$data, Year1971P1$data,
Year1972P1$data, Year1973P1$data, Year1974P1$data, Year1975P1$data,
Year1976P1$data, Year1977P1$data, Year1978P1$data, Year1979P1$data,
Year1980P1$data, Year1981P1$data, Year1982P1$data, Year1983P1$data,
Year1984P1$data, Year1985P1$data, Year1986P1$data, Year1987P1$data,
Year1988P1$data, Year1989P1$data, Year1990P1$data, Year1991P1$data,
Year1992P1$data, Year1993P1$data, Year1994P1$data, Year1995P1$data,
Year1996P1$data, Year1997P1$data, Year1998P1$data, Year1999P1$data,
Year2000P1$data, Year2001P1$data, Year2002P1$data, Year2003P1$data,
Year2004P1$data, Year2005P1$data, Year2006P1$data, Year2007P1$data,
Year2008P1$data, Year2009P1$data, Year2010P1$data, Year2011P1$data,
Year2012P1$data, Year2013P1$data, Year2014P1$data, Year2015P1$data,
Year2016P1$data, Year2017P1$data, Year2018P1$data, Year2019P1$data,
Year2020P1$data,
Year1948P2$data, Year1949P2$data, Year1950P2$data, Year1951P2$data,
Year1952P2$data, Year1953P2$data, Year1954P2$data, Year1955P2$data,
Year1956P2$data, Year1957P2$data, Year1958P2$data, Year1959P2$data,
Year1960P2$data, Year1961P2$data, Year1962P2$data, Year1963P2$data,
Year1964P2$data, Year1965P2$data, Year1966P2$data, Year1967P2$data,
Year1968P2$data, Year1969P2$data, Year1970P2$data, Year1971P2$data,
Year1972P2$data, Year1973P2$data, Year1974P2$data, Year1975P2$data,
Year1976P2$data, Year1977P2$data, Year1978P2$data, Year1979P2$data,
Year1980P2$data, Year1981P2$data, Year1982P2$data, Year1983P2$data,
Year1984P2$data, Year1985P2$data, Year1986P2$data, Year1987P2$data,
Year1988P2$data, Year1989P2$data, Year1990P2$data, Year1991P2$data,
Year1992P2$data, Year1993P2$data, Year1994P2$data, Year1995P2$data,
Year1996P2$data, Year1997P2$data, Year1998P2$data, Year1999P2$data,
Year2000P2$data, Year2001P2$data, Year2002P2$data, Year2003P2$data,
Year2004P2$data, Year2005P2$data, Year2006P2$data, Year2007P2$data,
Year2008P2$data, Year2009P2$data, Year2010P2$data, Year2011P2$data,
Year2012P2$data, Year2013P2$data, Year2014P2$data, Year2015P2$data,
Year2016P2$data, Year2017P2$data, Year2018P2$data, Year2019P2$data,
Year2020P2$data)
## build data.frame and remove 'station ID' column
mso_light <- mso[, -3]
## remove time from date group
mso_date <- mso_light[1]
mso_date <- sub("T.*", "", mso_date$date)
mso_light$date <- mso_date
## remove flags for fl_so? and fl_t (time)
mso_light <- mso_light[1:5]
## Change 'T' = 9998 & 'M' = 9999
mso_light$value[mso_light$fl_m == "T"] <- 0
mso_light$value[mso_light$fl_q == "M"] <- 'na'
mso_light$value <- as.numeric(mso_light$value)
## pivot data frame
## eventually use to change column names
## v_names <- c('PRCP', 'SNOW', 'SNWD', 'TMAX', 'TMIN')
mso_light <- mso_light[1:3]
mso_light <- pivot_wider(mso_light,
names_from = datatype,
values_from = value)
## mso_light <- select(mso_light, -c('fl_m','fl_q'))
options(stringAsFactors = FALSE)
mso_light$date <- as.Date(mso_light$date, "%Y-%m-%d")
## Turning all daily temperatures into an average
mso_light <- mso_light %>% rowwise() %>%
mutate(TAVG = mean(c(TMAX, TMIN), na.rm = T))
## Composing daily data into monthly packages
mso_light <- mso_light %>%
mutate(month = month(date)) %>%
mutate(year = year(date)) %>%
mutate(day = day(date))
mso_light <- mso_light %>%
relocate('year', 'month', 'day')
## mso_light <- mso_light[-4]
mso_sum <- mso_light %>%
group_by(month, year) %>%
summarize(AVG_TAVG=mean(TAVG, na.rm = TRUE),
T_PRCP=sum(PRCP, na.rm=TRUE),
T_SNOW=sum(SNOW, na.rm=TRUE)) %>%
ungroup()
## make 30 year averages, using 1981-2010
mso_light %>%
group_by(month, day) %>%
summarise(CliAvgT = mean(TAVG[1981:2010], na.rm = T)) %>%
mutate(Avg_DepT = CliAvgT - TAVG) %>%
ungroup()
##mso_DeptT <- mso_light %>%
## group_by(month, day) %>%
## mean(mso_light$TAVG[1981:2010], na.rm = T) %>%
## ungroup()
##mso_DeptT <- filter(mso_light, year >= "1981", year <= "2010") %>%
## group_by(day, month) %>%
## mutate(daily_DeptT = mean(TAVG, na.rm = T)) %>%
## ungroup()
cli_Avg <- filter(mso_sum, year >= "1981", year <= "2010") %>%
group_by(month) %>%
summarize(T_dep = mean(AVG_TAVG, na.rm = T),
Mon_Precip = mean(T_PRCP, na.rm = T),
Mon_Snow = mean(T_SNOW, na.rm = T))
write.csv(mso_light, "mso_light.csv")
write.csv(mso_sum, "mso_sum.csv")
write.csv(cli_Avg, "cli_avg.csv")
解决方案
使用data.table
您的代码可以这样编写。问题还在于,summary 删除了所有其他列,而且您应该像这样使用它summarise(CliAvgT = mean(TAVG, na.rm = T))
整洁的方式
mso_light %>%
group_by(month, day) %>%
summarise(CliAvgT = mean(TAVG, na.rm = T)) %>%
ungroup() %>% right_join(mso_light) %>%
mutate(Avg_DepT = CliAvgT - TAVG)
使用data.table
语法
这是您的代码的完整重新制定:
# rbinding all the data into one data.table
# lapply(c(res,res1),`[[`, "data") <=> lapply(c(res,res1),function(x) x[["data"]])
mso <- rbindlist(lapply(c(res,res1),`[[`, "data"))
# remove the station column
# remove time from date
mso[,c("station", "date"):=list(NULL, as.Date(sub("T.+$", "", date)))]
# dcast <=> pivot_wider
# mso[,1:3] get the first three columns
# and pivot them wider
# pivot_wider(names_from, values_from) <=> dcast(., idcol ~ names_from, value.var=values_from)
mso_light <- dcast(mso[,1:3], date ~ datatype, value.var = "value")
# calculate TAVG as we only have two values => TAVG= (TMAX+TMIN)/2
# then create the year, month day columns
mso_light[, TAVG:= (TMAX+TMIN)/2][, c("year", "month", "day") := list(year(date),month(date),day(date))]
# create a new data.table that contains the year, month and the average TAVG the sums of PRCP and SNOW by month and year
mso_sum <- mso_light[,list(year, month, AVG_TAVG=mean(TAVG, na.rm = TRUE),
T_PRCP=sum(PRCP, na.rm=TRUE),
T_SNOW=sum(SNOW, na.rm=TRUE)), by=c("year","month")]
#
mso_light[, CliAvgT := mean(TAVG, na.rm = T), by=c("month", "day")][, Avg_DepT := CliAvgT - TAVG]
cli_Avg <- mso_sum[year>=1981 & year<=2020, list(T_dep = mean(AVG_TAVG, na.rm = T),
Mon_Precip = mean(T_PRCP, na.rm = T),
Mon_Snow = mean(T_SNOW, na.rm = T)), by=month ]
一些解释:
基本上使用data.table
有[
一些新的论点。我将尝试做一个简洁的介绍:
df[, new.column:=old.col*2]
: 创建 new.column 并将其添加到当前df
<=>
df$new.column <- df$old.col*2
df[col<0]
返回验证col<0
ie的行df[df$col<0]
- 您可以将两者混合使用,它基本上只会修改验证条件的行。
by
论点:基本上按提供的列对数据进行分组df[, , by=col]<=> df %>% group_by(col)
- 最后一个警告是
df[, c(col1,col2)]
将返回一个包含这些列中的值的向量,使用list
而不是c
将返回一个包含列值的 data.table。
推荐阅读
- python - 如何使用张量流模型预测新输入
- java - 当用户答错问题时,我该如何结束这个游戏?
- php - Laravel7 CORS:被 CORS 策略阻止:对预检请求的响应未通过访问控制检查:没有“Access-Control-Allow-Origin”
- react-native - React Navigation 5.x 嵌套导航抽屉未从菜单按钮打开
- go - 如何将 Go 的 GC CPU 使用率提高到 25% 以上?
- fortran - 即使某些等级不参与,MPI_IBcast 是否也能保证发送
- python - 当我尝试从 PIL 调整 img 的大小时,不能将序列乘以“float”类型的非整数
- javascript - 如何在括号编辑器中禁用 javascript 代码提示?
- android - android:onClick 按钮工作缓慢
- ios - 从单独的类运行方法时返回 Nil