r - 根据 R 中另一个数据帧的分组变量和日期,将 NA 替换为一个数据帧中的因子
问题描述
我正在使用两个不同的数据框,它们的结构不同但包含相同的信息。这是第一个模拟数据框:-
UserId<-c("n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a", "iX45wN",
"n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a", "iX45wN", "n4RhHC",
"k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a", "iX45wN", "n4RhHC", "k6qjRM",
"nPUYOw", "Bm77mQ", "i95C1a", "iX45wN", "n4RhHC", "k6qjRM", "nPUYOw",
"Bm77mQ", "i95C1a", "iX45wN", "n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ",
"i95C1a", "iX45wN", "n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a",
"iX45wN", "n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a", "iX45wN",
"n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a", "iX45wN", "n4RhHC",
"k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a", "iX45wN", "n4RhHC", "k6qjRM",
"nPUYOw", "Bm77mQ", "i95C1a", "iX45wN", "n4RhHC", "k6qjRM", "nPUYOw",
"Bm77mQ", "i95C1a", "iX45wN", "n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ",
"i95C1a", "iX45wN", "n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a",
"iX45wN", "n4RhHC", "k6qjRM", "nPUYOw", "Bm77mQ", "i95C1a", "iX45wN"
)
eventDateTime<-structure(c(1420081869.97425, 1420172875.75246, 1420267947.2653,
1420368834.57653, 1420424712.65942, 1420541210.43439, 1420629609.9716,
1420703746.46464, 1420788777.7267, 1420850669.16688, 1420943298.10164,
1421028427.25171, 1421136879.38698, 1421210193.28063, 1421313257.14934,
1421387900.60726, 1421483801.11956, 1421582050.3433, 1421642017.51975,
1421745585.63356, 1421838779.26598, 1421893964.55692, 1421999352.30669,
1422063023.98015, 1422155543.93289, 1422247080.1288, 1422317378.46239,
1422419719.15975, 1422527170.64454, 1422590703.07666, 1422683225.86099,
1422774701.24366, 1422856520.98446, 1422929644.60038, 1423043742.52736,
1423123277.76309, 1423215111.16198, 1423271863.16464, 1423384864.31287,
1423457767.05536, 1423561864.87991, 1423640753.00037, 1423733022.69533,
1423809491.16866, 1423894883.88586, 1423992500.18921, 1424045807.90794,
1424151816.33881, 1424249235.95351, 1424333926.00324, 1424411033.16768,
1424514004.2494, 1424582125.79503, 1424660175.24237, 1424739053.33484,
1424826696.93812, 1424922462.93775, 1425017605.00017, 1425110198.6193,
1425185575.06409, 1425293836.23993, 1425353483.6657, 1425447031.63937,
1425527959.44992, 1425628117.60418, 1425697546.32493, 1425793473.15473,
1425892304.62097, 1425949239.4667, 1426069813.88146, 1426133047.95091,
1426241063.82313, 1426306176.72673, 1426392019.07701, 1426484578.37379,
1426588942.96811, 1426674139.46513, 1426740047.54828, 1426843180.25419,
1426937498.69748, 1427001177.28974, 1427099580.63412, 1427172479.75674,
1427255655.21296, 1427360706.16479, 1427423156.30542, 1427531520.43681,
1427592457.09099, 1427684205.1038, 1427766190.74919), class = c("POSIXct",
"POSIXt"), tzone = "")
eventOutcome<-structure(c(1L, 3L, 2L, 4L, 3L, 1L, 1L, 4L, 2L, 1L, 4L, 4L, 4L,
1L, 1L, 1L, 3L, 2L, 1L, 1L, 3L, 3L, 3L, 2L, 2L, 2L, 4L, 3L, 4L,
2L, 2L, 4L, 3L, 3L, 4L, 3L, 1L, 2L, 2L, 1L, 3L, 3L, 2L, 3L, 2L,
1L, 4L, 2L, 4L, 1L, 4L, 3L, 3L, 4L, 1L, 4L, 2L, 4L, 1L, 3L, 2L,
3L, 3L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 1L, 1L, 1L, 2L, 1L, 3L, 2L,
3L, 3L, 2L, 3L, 3L, 4L, 1L, 1L, 4L, 2L, 4L, 2L, 1L), .Label = c("Outcome_A",
"Outcome_B", "Outcome_C", "Outcome_D"), class = c("variable",
"factor"), varname = "Factor")
df<-data.frame(UserId,eventDateTime,eventOutcome)
df<-df%>%arrange(UserId,eventDateTime)
head(df)
# UserId eventDateTime eventOutcome
#1 Bm77mQ 2015-01-04 10:53:54 Outcome_D
#2 Bm77mQ 2015-01-10 00:44:29 Outcome_A
#3 Bm77mQ 2015-01-16 05:58:20 Outcome_A
#4 Bm77mQ 2015-01-22 02:32:44 Outcome_C
#5 Bm77mQ 2015-01-28 04:35:19 Outcome_C
#6 Bm77mQ 2015-02-03 02:14:04 Outcome_C
这是另一个数据框:-
UserId<-c("n4RhHC","n4RhHC","n4RhHC","nPUYOw","nPUYOw","nPUYOw","iX45wN","iX45wN","iX45wN")
Firstime<-c("2015-01-01 03:11:09","2015-01-19 04:33:37","2015-02-18 08:47:15",
"2015-01-21 11:12:59","2015-02-08 08:41:04","2015-03-04 05:30:31",
"2015-01-12 02:07:07","2015-02-05 08:01:17","2015-03-01 04:52:55")
Lasttime<-c("2015-01-13 08:14:39","2015-02-06 09:31:51","2015-02-24 00:50:53",
"2015-02-02 05:55:20","2015-02-26 03:47:42","2015-03-16 05:42:58",
"2015-01-24 01:30:23","2015-02-17 05:43:36","2015-03-13 10:04:23")
eventOutcome<-c("Outcome_D",NA, "Outcome_A",NA,NA,"Outcome_A","Outcome_B","Outcome_B",NA)
df2<-data.frame(UserId,Firstime,Lasttime,eventOutcome)
df2$Firstime<-lubridate::ymd_hms(df2$Firstime)
df2$Lasttime<-lubridate::ymd_hms(df2$Lasttime)
head(df2)
UserId Firstime Lasttime eventOutcome
#1 n4RhHC 2015-01-01 03:11:09 2015-01-13 08:14:39 Outcome_D
#2 n4RhHC 2015-01-19 04:33:37 2015-02-06 09:31:51 <NA>
#3 n4RhHC 2015-02-18 08:47:15 2015-02-24 00:50:53 Outcome_A
#4 nPUYOw 2015-01-21 11:12:59 2015-02-02 05:55:20 <NA>
#5 nPUYOw 2015-02-08 08:41:04 2015-02-26 03:47:42 <NA>
#6 nPUYOw 2015-03-04 05:30:31 2015-03-16 05:42:58 Outcome_A
如您所见,df2 中有 NA 条目,我想用 df. 如何根据 UserId 和 Lasttime 列中的相应时间条目(也是 eventDateTime 中的匹配时间条目)用 df 中的相应 eventOutcome 填充 df2 中的 NA 字段?
例如,df2 中的第一个 NA 字段将替换为“Outcome_A”,第二个 NA 字段将替换为“Outcome_C”等等。我希望这是有道理的。
解决方案
推荐阅读
- r - shinydashboardPlus 中是否有一个带有标签的框?
- bootstrap-4 - Bootstrap 4:如何设置特定响应方块的网格 - Col 或 Card?
- c# - 后面带标点的单词怎么找?
- salesforce - 使用 Docusign 发送 Salesforce 附件
- c# - 对象具有附加属性 ActionContext 等
- android - 存储访问框架 - 无法在使用 ACTION_OPEN_DOCUMENT_TREE (NextCloud) 选择的云文件夹中创建文件
- android - 从应用程序收到错误 -32000:无法在引擎中运行配置。#扑
- amazon-web-services - AWS 网络负载均衡器无法获取客户端 IP
- python - 最后 10 行的列中的最大值
- mysql - MySQL如何创建一个用日期格式化的自定义id列?