首页 > 解决方案 > 如何在一个条件下组合 2 个数据帧?

问题描述

我有 2 个数据框。

df1 -

     T1    T2    T3    T4
ID1  0     1.3   -1.5   0
ID2  0.05  0.3    0    -0.004

df2 -

     Value1    Value2    Value3    Value4    
T1   0         0         1          0    
T2   0         1         0          0 
T3   1         0         0          1 
T4   0         1         1          1 

现在,我想要以下结果:

     Value1    Value2    Value3    Value4 
ID1  1          1         0         1
ID2  0          2         2         1 

在最终结果中,我想合并 df1 和 df2。

例如,在 df1,ID1 行中,我们的第一个值为零,因此我们将忽略 df2 中的 T1。我们将跳到 d1 中的下一个值 1.3,由于该值不等于 0,我们将从 df2 获取 T2 值并将其放入输出表中。对于每个单元格也是如此。

标签: rdataframemerge

解决方案


请参阅下面的数据。这里所有解决方案都需要的一件事是行名位于数据列中,因此从以下开始:

df1$ID <- rownames(df1)
df2$Tnum <- rownames(df2)

碱基R

# library(reshape2) # melt, dcast
df1m <- reshape2::melt(df1, id="ID", variable.name = "Tnum")
df2m <- reshape2::melt(df2, id="Tnum")
dfcomb <- merge(subset(df1m, abs(value) > 0), df2m, by = "Tnum", all = TRUE)
dfcomb2 <- aggregate(dfcomb$value.y, by = dfcomb[c("ID", "variable")], FUN = sum)
reshape2::dcast(dfcomb2, ID ~ variable)
# Using x as value column: use value.var to override.
#    ID Value1 Value2 Value3 Value4
# 1 ID1      1      1      0      1
# 2 ID2      0      2      2      1

tidyverse

library(dplyr)
library(tidyr) # pivot_longer, pivot_wider
left_join(
  pivot_longer(df1, -ID, names_to = "Tnum"),
  pivot_longer(df2, -Tnum),
  by = "Tnum"
) %>%
  filter(abs(value.x) > 0) %>%
  group_by(ID, name) %>%
  summarize(value = sum(value.y), .groups = "drop") %>%
  pivot_wider(ID)
# # A tibble: 2 x 5
#   ID    Value1 Value2 Value3 Value4
#   <chr>  <int>  <int>  <int>  <int>
# 1 ID1        1      1      0      1
# 2 ID2        0      2      2      1

数据表

library(data.table)
tmp <- merge(
  melt(DT1, id.vars = "ID", variable.name = "Tnum")[ abs(value) > 0 ],
  melt(DT2, id.vars = "Tnum"),
  by = "Tnum", allow.cartesian = TRUE
)[, .(value = sum(value.y)), by = .(ID, variable) ]
dcast(tmp, ID ~ variable)
#        ID Value1 Value2 Value3 Value4
#    <char>  <int>  <int>  <int>  <int>
# 1:    ID1      1      1      0      1
# 2:    ID2      0      2      2      1

数据

df1 <- structure(list(T1 = c(0, 0.05), T2 = c(1.3, 0.3), T3 = c(-1.5, 0), T4 = c(0, -0.004)), class = "data.frame", row.names = c("ID1", "ID2"))
df2 <- structure(list(Value1 = c(0L, 0L, 1L, 0L), Value2 = c(0L, 1L, 0L, 1L), Value3 = c(1L, 0L, 0L, 1L), Value4 = c(0L, 0L, 1L, 1L)), class = "data.frame", row.names = c("T1", "T2", "T3", "T4"))

推荐阅读