首页 > 解决方案 > 在数据框中添加特定行

问题描述

我正在尝试将数据框的特定行添加在一起。

并且没有使用 grepl 查找行然后将它们绑定到底部,我不确定是否有更好的方法来做到这一点。

这是我的输入df:

input = structure(list(
V1 = c("Sales", "Sales", "Sales", "Sales", "Sales","Sales"),
V2 = c("Johnny", "Meg", "Fred", "Johnny", "Meg", "Fred"),
V3 = c("Australia", "Australia", "Australia", "NZ", "NZ","NZ"), 
V4 = c(154L, 1898L, 175L, 1235L, 23L, 255L)), row.names = c(NA,6L),
 class = "data.frame")

这是我的预期输出:

structure(list(
V1 = c("Sales", "Sales", "Sales", "Sales", "Sales", 
"Sales", "Sales", "Sales", "Sales", "Sales", "Sales", "Sales"), 
V2 = c("Johnny", "Meg", "Fred", "Johnny", "Meg", "Fred", "Johnny + Fred", 
"Meg + Fred", "Johnny + Meg + Fred", "Johnny + Fred", "Meg + Fred", 
"Johnny + Meg + Fred"), 
V3 = c("Australia", "Australia", "Australia", "NZ",
 "NZ", "NZ", "Australia", "Australia", "Australia", "NZ", "NZ", "NZ"), 
V4 = c(154L, 1898L, 175L, 1235L, 23L, 255L, 329L, 2073L, 2227L, 1490L, 278L, 1513L)),
 class = "data.frame", row.names = c(NA, -12L)
)

我会认为有更好的方法来添加这些行过滤然后添加,然后加入等。

谁能指出我应该寻找的正确方向?

标签: rlistdataframestructure

解决方案


I solve the problem using combn

Data input part

input = structure(list(
  V1 = c("Sales", "Sales", "Sales", "Sales", "Sales","Sales"),
  V2 = c("Johnny", "Meg", "Fred", "Johnny", "Meg", "Fred"),
  V3 = c("Australia", "Australia", "Australia", "NZ", "NZ","NZ"), 
  V4 = c(154L, 1898L, 175L, 1235L, 23L, 255L)), row.names = c(NA,6L),
  class = "data.frame")


structure(list(
  V1 = c("Sales", "Sales", "Sales", "Sales", "Sales", 
         "Sales", "Sales", "Sales", "Sales", "Sales", "Sales", "Sales"), 
  V2 = c("Johnny", "Meg", "Fred", "Johnny", "Meg", "Fred", "Johnny + Fred", 
         "Meg + Fred", "Johnny + Meg + Fred", "Johnny + Fred", "Meg + Fred", 
         "Johnny + Meg + Fred"), 
  V3 = c("Australia", "Australia", "Australia", "NZ",
         "NZ", "NZ", "Australia", "Australia", "Australia", "NZ", "NZ", "NZ"), 
  V4 = c(154L, 1898L, 175L, 1235L, 23L, 255L, 329L, 2073L, 2227L, 1490L, 278L, 1513L)),
  class = "data.frame", row.names = c(NA, -12L)
)

Solution

library(dplyr)

TT = unique(input$V2)
> TT
[1] "Johnny" "Meg"    "Fred"  

comb2 = combn(TT,2,simplify = FALSE)
> comb2

 [[1]]
[1] "Johnny" "Meg"   

[[2]]
[1] "Johnny" "Fred"  

[[3]]
[1] "Meg"  "Fred"

comb3 = combn(TT,3,simplify = FALSE)
> comb3
 [[1]]
[1] "Johnny" "Meg"    "Fred"  

result = function(data){
  purrr::map_df(lapply(data,function(x){paste(x,collapse = '|')}), function(x){
    df = input[grepl(x,input$V2),] %>% group_by(V3)%>%summarize(V1= 'Sales',
                                                                V2= paste(V2,collapse = '+'),
                                                                V4= sum(V4))
    return(df)
  }
  )
}

Result

result(comb2)
# A tibble: 6 x 4
  V3        V1    V2             V4
  <chr>     <chr> <chr>       <int>
1 Australia Sales Johnny+Meg   2052
2 NZ        Sales Johnny+Meg   1258
3 Australia Sales Johnny+Fred   329
4 NZ        Sales Johnny+Fred  1490
5 Australia Sales Meg+Fred     2073
6 NZ        Sales Meg+Fred      278

result(comb3)
# A tibble: 2 x 4
  V3        V1    V2                 V4
  <chr>     <chr> <chr>           <int>
1 Australia Sales Johnny+Meg+Fred  2227
2 NZ        Sales Johnny+Meg+Fred  1513


finalResult = bind_rows(A,B,input) %>%
  select(V1,V2,V3,V4) %>% filter(! V2 %in% c('Johnny+Meg'))

> finalResult 
# A tibble: 12 x 4
   V1    V2              V3           V4
   <chr> <chr>           <chr>     <int>
 1 Sales Johnny+Fred     Australia   329
 2 Sales Johnny+Fred     NZ         1490
 3 Sales Meg+Fred        Australia  2073
 4 Sales Meg+Fred        NZ          278
 5 Sales Johnny+Meg+Fred Australia  2227
 6 Sales Johnny+Meg+Fred NZ         1513
 7 Sales Johnny          Australia   154
 8 Sales Meg             Australia  1898
 9 Sales Fred            Australia   175
10 Sales Johnny          NZ         1235
11 Sales Meg             NZ           23
12 Sales Fred            NZ          255

推荐阅读