首页 > 解决方案 > 如何平均 7 年的数据

问题描述

我是 R 编程和这个网站的新手,所以请忍受我的无能。我从过去 7 年中提取了 7 个变量的大气数据;臭氧、一氧化碳、一氧化氮、二氧化氮、风速、PM 2.5、PM 10。我要做的是绘制这些数据,看看政府在这次 covid-19 大流行期间强制居家令是否对大气成分有任何影响。从图中我不太确定下一步该做什么。我相信我需要过去 7 年的平均值?数据是以 24 小时格式显示的每小时数据。

NO.dat 数据框

dput(head(NO.dat,10))
structure(list(Date = c("3/1/2014", "3/2/2014", "3/3/2014", "3/4/2014",
"3/5/2014", "3/6/2014", "3/7/2014", "3/8/2014", "3/9/2014", "3/10/2014"
), X0.00 = c(3.6, NA, 2.3, 17.1, 0.4, 0.9, 110.9, 0.1, NA, 0.4
), X1.00 = c(6.3, NA, 1.4, 18.7, 0.2, 0.2, 15.8, 0, NA, 0.6),
X2.00 = c(2.3, 0.6, 0.4, 13.9, 0.2, 0.1, 13.5, 0, 0.8, 0.3
), X3.00 = c(0.9, 0.3, 0.9, 4.2, 0.5, 0.3, 22.7, 0.2, 0.5,
0.7), X4.00 = c(0.2, 0.1, 2.8, 5.2, 0.7, 0, 40.1, 0.1, 0.8,
2.8), X5.00 = c(0.4, 0, 4.4, 11.1, 2.4, 1.8, 22.2, 0.1, 0.8,
4.1), X6.00 = c(11.8, 0.1, 17.6, 51.8, 3.6, 8.2, 2.8, 0.3,
1, 20.1), X7.00 = c(39.5, 0.6, 30.3, 118.6, 15.7, 12, 3.7,
1, 1.9, 39.1), X8.00 = c(23.9, 0.7, 25.8, 35.6, 20.6, 11.4,
6.3, 1.5, 1.2, 33.5), X9.00 = c(8.4, 1.1, 20.8, 28.7, 5.1,
9.4, 3.7, 1.3, 0.8, 9.6), X10.00 = c(4.3, 0.5, 13.3, 17.1, 
1.1, 6, 1.3, 2.4, 1.4, 2.5), X11.00 = c(3.9, 0.3, 8.3, 13.9, 
0.5, 5.6, 0.9, 2.3, 1.3, 1.2), X12.00 = c(4.1, 0.6, 6.3, 
12.2, 0.6, 4.3, 0.8, 1.6, 1, 1.1), X13.00 = c(2.6, 0.6, 9.1, 
9, 0.6, 3.6, 0.7, 2, 1.6, 1.1), X14.00 = c(3.7, 0.5, 9.3, 
1.4, 0.9, 2.3, 0.9, 1.1, 1.1, 1.3), X15.00 = c(3.4, 0.5, 
9.4, 0.8, 0.8, 1.8, 1.2, 1.8, 1.2, 1.1), X16.00 = c(1, 0.3, 
5.7, 0.6, 2.5, 2.3, 1.1, 2.3, 1.2, 1), X17.00 = c(0.9, 0.3, 
13.4, 0.5, 3.2, 1.8, 0.7, 1.4, 0.6, 0.7), X18.00 = c(0.8, 
0.2, 22.1, 0.5, 3.9, 0.7, 0.7, 0.9, 0.4, 0.5), X19.00 = c(0.5, 
0.2, 24.2, 1.8, 15.4, 1.1, 0.1, 0.8, 10.1, 0.6), X20.00 = c(0.5, 
1, 18.4, 17.1, 5.1, 33.4, 0.3, 0.1, 45.3, 0.5), X21.00 = c(1, 
0.5, 15.4, 55.7, 2, 39.5, 4.1, 0, 49.5, 0.4), X22.00 = c(0.4, 
0.2, 8.1, 52.6, 2.7, 25.2, 0.9, 0.3, 27.2, 0.5), X23.00 = c(0.4, 
6, 11.9, 2.2, 2.5, 62.1, 0.2, 0.1, 3.3, 0.4)), row.names = c(NA, 
10L), class = "data.frame")

NO2.dat 数据框

dput(head(NO2.dat,10))
structure(list(Date = c("3/1/2014", "3/2/2014", "3/3/2014", "3/4/2014", 
"3/5/2014", "3/6/2014", "3/7/2014", "3/8/2014", "3/9/2014", "3/10/2014"
), X0.00 = c(5, 0.5, 3.2, 16.3, 0.4, 2, 91.2, 0.2, 0.5, 0.2),
X1.00 = c(7, 0.4, 2.4, 18.4, 0.3, 0.6, 17.7, 0.2, 0.5, 0.1
), X2.00 = c(1.7, 0.4, 0.3, 16.3, 0.1, 0.4, 10.3, 0.3, 0.6, 
0.2), X3.00 = c(0.8, 0.6, 0.7, 4.4, 0.8, 0.6, 8.5, 0.4, 0.5, 
0.6), X4.00 = c(0.6, 0.2, 2.6, 4.4, 1, 0.6, 43.7, 0.3, 0.7, 
2.6), X5.00 = c(0.6, 0.3, 5, 12.8, 2.7, 2.8, 15.7, 0.4, 0.7, 
4.3), X6.00 = c(5.8, 0.4, 18.6, 60.5, 3.8, 9.5, 3, 0.6, 0.9, 
22.1), X7.00 = c(32, 0.7, 27.4, 117.5, 15.3, 12.6, 4.4, 1.7, 
2.2, 36.2), X8.00 = c(21.3, 1, 22.7, 37.1, 20.3, 12.5, 7.6, 
2.1, 1.4, 33.2), X9.00 = c(7.9, 1.4, 19.4, 28.7, 5, 10.5, 
4.8, 2.3, 0.9, 11), X10.00 = c(4.2, 0.6, 12.4, 19, 1.6, 8.1, 
1.9, 3.1, 1.8, 2.9), X11.00 = c(4.2, 0.8, 9.6, 15.7, 1.1, 
7.4, 1.6, 3.4, 1.8, 1), X12.00 = c(4.2, 0.9, 6.6, 14.2, 1.2, 
6.1, 1.4, 2.7, 1.3, 1.2), X13.00 = c(NA, 0.8, 9.4, NA, 1.4, 
3.9, 1.2, NA, 1.9, 1.2), X14.00 = c(NA, 0.9, 9.6, NA, 1.9, 
3.1, 1.3, NA, 1.3, 1.3), X15.00 = c(NA, 0.9, 9.6, NA, 1.7, 
2.9, 1.9, NA, 1.6, 1), X16.00 = c(1, 0.8, 6.4, 1.2, 3.8, 
3, 1.8, 3, 1.2, 1.1), X17.00 = c(1.2, 0.7, 12, 1, 4, 1.5, 
1.5, 2, 0.5, 0.6), X18.00 = c(0.9, 0.5, 20.2, 0.9, 5, 1, 
1.3, 1.5, 0.3, 0.3), X19.00 = c(0.5, 0.5, 19.1, 2.1, 15.8, 
1.1, 0.6, 1.3, 5.1, 0.4), X20.00 = c(0.4, 1.1, 17.5, 7, 4.2, 
24.9, 0.5, 0.7, 32.1, 0.4), X21.00 = c(0.7, 0.7, 13.3, 28.4, 
2.4, 31.7, 3.4, 0.7, 37, 0.3), X22.00 = c(0.4, 0.4, 7.3, 
21, 2.9, 18.5, 1.2, 0.6, 20, 0.3), X23.00 = c(0.4, 5.8, 11.6, 
0.8, 2.9, 47.8, 0.5, 0.6, 2.1, 0.2)), row.names = c(NA, 10L
), class = "data.frame")

任何帮助将非常感激!'''

library(reshape2)
library(dplyr)
library(lubridate)
library(ggplot2)

#remove summary stats
NO.dat <- NO.dat[,1:25]
NO2.dat <- NO2.dat[,1:25]

#reorganize data using reshape
x<-melt(NO.dat, id="Date")
colnames(x) <- c("Date","Hour","NO")
x$Hour<- as.numeric(x$Hour)-y<-melt(NO2.dat, id="Date")
y<-melt(NO2.dat, id="Date")
colnames(y) <- c("Date","Hour","NO2")
y$Hour<- as.numeric(y$Hour)-1

x <- cbind(x,y$NO2)
colnames(x)[4] <- "NO2"
x$min <- ":00"
x$time <- paste(x$Hour, x$min, sep="")
x$DT <- paste(x$Date, x$time)
x %>% select(DT, NO, NO2) %>% mutate(NOx=NO + NO2) %>% mutate(DT =       
mdy_hm(DT)) %>% arrange(DT) -> x

p <- ggplot(x, aes(x=DT, y=NOx)) + geom_line() + xlab("")

x$index <- 1:nrow(x)
loessMod10 <- loess(NOx ~ index, data=x, span=0.10, na.action=)
x <- na.omit(x)
x$smoothed10 <- predict(loessMod10)

#pdf("El Paso NOx.pdf",w=6,h=3,useDingbats= FALSE)
p <- ggplot(x) + geom_line(aes(x=DT, y=NOx), linetype = "dashed", 
size=0.3) + xlab("") +
geom_line(aes(x=DT, y=smoothed10), color = "red") + labs(y="NOx 
(ppbv)") +
ggtitle("NOx concentrations at Chamizal TCEQ Site") + 
theme(plot.title = element_text(hjust = 0.5)) + 
annotate("text",x=as.POSIXct("2020-03-24 17:00:00"), y=130, +
label="Stay Home 1", angle=90, size=2.5) 
 annotate("text",x=as.POSIXct("2020-04-01 17:00:00"), y=130, 
label="Stay Home 2", angle=90, size = 2.5) +
annotate("segment", x = as.POSIXct("2020-03-24 17:00:00"), xend= 
as.POSIXct("2020-03-24 17:00:00"), y = 0, yend = 105, colour = 
"blue") +
annotate("segment", x = as.POSIXct("2020-04-01 17:00:00"), xend= 
as.POSIXct("2020-04-01 17:00:00"), y = 0, yend = 105, colour = 
"blue")

''' 我认为需要平均的图表?

标签: rggplot2dplyrlubridatereshape2

解决方案


也许您可以添加一year列并使用它按年份对数据进行分组。然后,您可以按年份叠加数据。

为了好玩,我尝试复制避免重塑的情节。

library(dplyr)
library(tidyr)
library(stringr)
library(ggplot2)

# stack raw data for NO and NO2
NO_stacked <- NO %>%
  pivot_longer(cols = starts_with("X"),
               names_to = "hours",
               values_to = "NO") 
NO2_stacked <- NO2 %>%
  pivot_longer(cols = starts_with("X"),
               names_to = "hours",
               values_to = "NO2")

# combine into one data frame
data <- bind_cols(NO_stacked, NO2_stacked) %>%
  select(Date, hours, NO, NO2)

# coerce dates to POSIXct and add hours; remove hours; reshape to long format using pivot_longer
data <- data %>%
  mutate(Date = as.POSIXct(Date, format = "%m/%d/%Y", tz = "UTC"),
         hours = as.numeric(str_sub(hours, start = 2, end = -1))) %>%
  mutate(Date = Date + 60*60*hours) %>%
  select(-hours) %>%
  pivot_longer(cols = contains("NO"),
               names_to = "Contaminant",
               values_to = "Concentration")

# plot
ggplot(data = data, aes(x = Date, y = Concentration)) +
  geom_line() +
  geom_smooth(method = "loess", formula = y ~ x) +
  facet_wrap(vars(Contaminant), nrow = 2) +
  ggtitle("NOx concentrations at Chamizal TCEQ Site")

在此处输入图像描述


推荐阅读