首页 > 解决方案 > 将 3 列的 df 变成 6 列(NA 的特殊情况)

问题描述

我有以下数据框:

数据

df <- data.frame(ID=c("foo","bar","cake") , col1 = c("1a/1b","1a","1b"), col2 = c("2b","2a","2b"), col3=c("3b","3b/3a",NA))
   col1 col2  col3
1 1a/1b   2b    3b
2    1a   2a 3a/3b
3    1b   2b  <NA>

我想把它变成 6 列的名字1a 1b 2a 2b 3a 3c

这是我目前拥有的(我注意到有一些 NA 值我想保留为 NA):

library(dplyr)
library(tidyr)
df %>%
  pivot_longer(cols = -ID) %>%
  separate_rows(value, sep="/") %>%
  mutate(i1 = 1) %>%
  select(-name) %>%
  pivot_wider(names_from = value, values_from = i1, values_fill = list(i1 = NA))
# A tibble: 3 x 8
  ID     `1a`  `1b`  `2b`  `3b`  `2a`  `3a`  `NA`
  <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 foo       1     1     1     1     0     0     0
2 bar       1     0     0     1     1     1     0
3 cake      0     1     1     0     0     0     1

期望的输出

ID        `1a`  `1b`  `2b`  `3b`  `2a`  `3a`
    <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 foo       1     1     1     1     0     0
2 bar       1     0     0     1     1     1
3 cake      0     1     1     NA    0     NA

标签: rdplyrtidyr

解决方案


这是一种方法dplyrtidyr

library(dplyr)
library(tidyr)

df %>%
  #Get the data in long format
  pivot_longer(cols = -ID, values_drop_na = TRUE) %>%
  #Separate into different rows
  separate_rows(value, sep="/") %>%
  #Create a dummy column
  mutate(i1 = 1) %>%
  #Expand the missing combinations for each ID
  complete(ID, value = unique(value)) %>%
  #Get only the number from the value
  mutate(num = readr::parse_number(value)) %>%
  #Group by ID and number
  group_by(ID, num) %>%
  #Change to 0 the values with NA and has atleast one non-NA in the group
  mutate(i1 = replace(i1, is.na(i1) & any(!is.na(i1)), 0)) %>%
  ungroup() %>%
  #Drop num and name column
  select(-num, -name) %>%
  #Get data in wide format
  pivot_wider(names_from = value, values_from = i1)

# A tibble: 3 x 7
#  ID     `1a`  `1b`  `2a`  `2b`  `3a`  `3b`
#  <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 bar       1     0     1     0     1     1
#2 cake      0     1     0     1    NA    NA
#3 foo       1     1     0     1     0     1

推荐阅读