首页 > 解决方案 > 在 r 中使用 dplyr 有效地重塑数据帧

问题描述

我有一个这样的数据框。

id = letters[1:5]
items = c('A,B,C,D,E',
          'C,D,E,A,B',
          'E,D,C',
          'B,A',
          'A')
dat = tibble(id = id, items =items)

> dat
# A tibble: 5 x 2
  id    items    
  <chr> <chr>    
1 a     A,B,C,D,E
2 b     C,D,E,A,B
3 c     E,D,C    
4 d     B,A      
5 e     A     

我想items分成几块,让其他变量替换它们(var Ato var B)。

dat我想要这样的格式:

final.dat = tibble(
  id = id,
  A  = c(1, 1, 0, 1, 1),
  B  = c(1, 1, 0, 1, 0),
  C  = c(1, 1, 1, 0 ,0),
  D  = c(1, 1, 1, 0, 0),
  E  = c(1, 1, 1, 0, 0)
)

> final.dat
# A tibble: 5 x 6
  id        A     B     C     D     E
  <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a         1     1     1     1     1
2 b         1     1     1     1     1
3 c         0     0     1     1     1
4 d         1     1     0     0     0
5 e         1     0     0     0     0

这是我的代码,但我认为它是多余的。

而且我的代码中还有一个BUG:当我用 替换map(as_tibble)map(as.data.frame),值是 all NA

有没有更有效的方法来做到这一点?

任何帮助将不胜感激!

# get id
id = dat[,1]

# reshape items
items <- dat[,2]

# function that let the first row to colnames and then add a row that all value is 1. Finally, remove the first row
make.title <- function(data){
  row.1 <- unlist(slice(data, 1))
  colnames(data) <- row.1
  data <- rbind(data, rep(1, ncol(data)))
  data <- slice(data, -1)
  data
}

# final.dat.2 is what I wanted
final.dat.2 <-  
  split(items, seq(nrow(items))) %>% 
  map(unlist) %>% 
  map(~str_split(., pattern = ',')) %>% 
  map(unlist) %>% 
  map(rbind) %>% 
  map(as_tibble) %>% 
  map(make.title) %>% 
  bind_rows() %>% 
  transmute(across(.cols = everything(), ~replace_na(., 0))) %>% 
  bind_cols(id)

# bug occur
final.dat.3 <-  
  split(items, seq(nrow(items))) %>% 
  map(unlist) %>% 
  map(~str_split(., pattern = ',')) %>% 
  map(unlist) %>% 
  map(rbind) %>% 
  map(as.data.frame) %>%  # as dataframe
  map(make.title) %>% 
  bind_rows() %>% 
  transmute(across(.cols = everything(), ~replace_na(., 0))) %>% 
  bind_cols(id)

标签: rdataframedplyrtidyverse

解决方案


尝试这个。您可以使用separate_rows()and pivot_wider()fromtidyverse来达到预期的输出:

library(dplyr)
library(tidyr)
#Code
newdf <- dat %>% separate_rows(items,sep=',') %>%
  mutate(Val=1) %>%
  pivot_wider(names_from = items,values_from=Val,values_fill=0)

输出:

# A tibble: 5 x 6
  id        A     B     C     D     E
  <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 a         1     1     1     1     1
2 b         1     1     1     1     1
3 c         0     0     1     1     1
4 d         1     1     0     0     0
5 e         1     0     0     0     0

推荐阅读