首页 > 解决方案 > 将多个列 ID 折叠成一个 ID 列

问题描述

我正在尝试合并一些数据。我有一些包含 ID 的数据。我想将每一列合并到一个新列finalID中,它将所有其他列折叠成一列。

对于许多列 ID,它们将是相同的,但是一两列可能不同。如果它们不同,那么我希望它可以将第二个 ID 添加到新列finalID_WithDoubleResults中。

我怎样才能首先将列折叠成单个 ID 列然后,如果列包含不同的 ID 将它们拆分为两个或更多新列。

数据:

structure(list(sabiUniqueID.x = c(74L, 82L, 129L, 153L, 179L, 
203L, 211L, 220L, 223L, 231L, 237L, 243L, 259L, 260L, 275L, 280L, 
290L, 292L, 297L, 300L), sabiUniqueID.y = c(74L, 82L, 129L, NA, 
NA, NA, 211L, NA, 223L, 231L, 237L, NA, NA, NA, NA, NA, 290L, 
NA, 297L, NA), sabiUniqueID.x.x = c(74L, 82L, 129L, 153L, NA, 
NA, 211L, NA, 223L, 231L, 237L, NA, NA, 260L, 275L, NA, 290L, 
292L, 297L, NA), sabiUniqueID.y.y = c(NA, NA, 129L, NA, 179L, 
203L, 211L, NA, 223L, NA, NA, NA, 259L, 260L, 275L, NA, 290L, 
292L, NA, NA), sabiUniqueID.x.x.x = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 292L, NA, NA), sabiUniqueID.y.y.y = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 260L, 275L, NA, 
290L, NA, NA, NA), sabiUniqueID.x.x.x.x = c(74L, 82L, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 243L, NA, NA, NA, NA, NA, 292L, NA, 
NA), sabiUniqueID.y.y.y.y = c(74L, 82L, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 260L, 275L, NA, 290L, NA, NA, NA), sabiUniqueID.x.x.x.x.x = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 290L, 
NA, NA, 300L), sabiUniqueID.y.y.y.y.y = c(74L, 82L, 129L, NA, 
179L, 203L, NA, NA, 223L, NA, NA, 243L, NA, 260L, NA, NA, 290L, 
292L, NA, 300L), sabiUniqueID.x.x.x.x.x.x = c(74L, 82L, 129L, 
153L, 179L, 203L, 211L, 220L, 223L, 231L, 237L, 243L, 259L, 260L, 
275L, 280L, 290L, 292L, 297L, 300L), sabiUniqueID.y.y.y.y.y.y = c(NA, 
82L, NA, NA, 179L, 203L, NA, NA, NA, NA, NA, 243L, 259L, 260L, 
NA, 280L, 290L, 292L, NA, 300L), sabiUniqueID = c(NA, 82L, NA, 
NA, 179L, 203L, NA, NA, NA, NA, NA, 243L, 259L, 260L, NA, 280L, 
290L, 292L, NA, 300L)), row.names = c(NA, -20L), groups = structure(list(
    sabiUniqueID.x = c(74L, 82L, 129L, 153L, 179L, 203L, 211L, 
    220L, 223L, 231L, 237L, 243L, 259L, 260L, 275L, 280L, 290L, 
    292L, 297L, 300L), sabiUniqueID.y = c(74L, 82L, 129L, NA, 
    NA, NA, 211L, NA, 223L, 231L, 237L, NA, NA, NA, NA, NA, 290L, 
    NA, 297L, NA), sabiUniqueID.x.x = c(74L, 82L, 129L, 153L, 
    NA, NA, 211L, NA, 223L, 231L, 237L, NA, NA, 260L, 275L, NA, 
    290L, 292L, 297L, NA), sabiUniqueID.y.y = c(NA, NA, 129L, 
    NA, 179L, 203L, 211L, NA, 223L, NA, NA, NA, 259L, 260L, 275L, 
    NA, 290L, 292L, NA, NA), sabiUniqueID.x.x.x = c(NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 292L, 
    NA, NA), sabiUniqueID.y.y.y = c(NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, 260L, 275L, NA, 290L, NA, NA, NA), 
    sabiUniqueID.x.x.x.x = c(74L, 82L, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, 243L, NA, NA, NA, NA, NA, 292L, NA, NA), sabiUniqueID.y.y.y.y = c(74L, 
    82L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 260L, 275L, 
    NA, 290L, NA, NA, NA), sabiUniqueID.x.x.x.x.x = c(NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 290L, 
    NA, NA, 300L), sabiUniqueID.y.y.y.y.y = c(74L, 82L, 129L, 
    NA, 179L, 203L, NA, NA, 223L, NA, NA, 243L, NA, 260L, NA, 
    NA, 290L, 292L, NA, 300L), sabiUniqueID.x.x.x.x.x.x = c(74L, 
    82L, 129L, 153L, 179L, 203L, 211L, 220L, 223L, 231L, 237L, 
    243L, 259L, 260L, 275L, 280L, 290L, 292L, 297L, 300L), sabiUniqueID.y.y.y.y.y.y = c(NA, 
    82L, NA, NA, 179L, 203L, NA, NA, NA, NA, NA, 243L, 259L, 
    260L, NA, 280L, 290L, 292L, NA, 300L), .rows = structure(list(
        1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
        14L, 15L, 16L, 17L, 18L, 19L, 20L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr"))), row.names = c(NA, 20L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

编辑:

较小的数据

structure(list(sabiUniqueID.x = c(NA, NA, NA, NA, 820L), sabiUniqueID.y = c(1301L, 
                                                                            NA, NA, NA, 820L), sabiUniqueID.x.x = c(9999L, NA, 1176L, NA, 
                                                                                                                    820L), sabiUniqueID.y.y = c(NA, NA, NA, 1537L, 820L), sabiUniqueID.x.x.x = c(NA, 
                                                                                                                                                                                                 NA, NA, NA, 820L)), row.names = c(NA, -5L), class = c("tbl_df", 
                                                                                                                                                                                                                                                       "tbl", "data.frame"))

看起来像:

    sabiUniqueID.x sabiUniqueID.y sabiUniqueID.x.x sabiUniqueID.y.y sabiUniqueID.x.x.x
           <int>          <int>            <int>            <int>              <int>
1             NA           1301             9999               NA                 NA
2             NA             NA               NA               NA                 NA
3             NA             NA             1176               NA                 NA
4             NA             NA               NA             1537                 NA
5            820            820              820              820                820

现在我在哪里添加了结果9999,它有两个用于这个观察的 ID(其他观察有重复的 ID)。

所以预期的输出可能如下所示:

finalID  finalID_WithDoubleResults
1301         99999
NA            NA
1176          NA
1537          NA
820           NA

编辑2:

df %>% 
  pivot_longer(cols = c(sabiUniqueID.x:sabiUniqueID.x.x.x), names_to = "id_type", values_to = "id_

value") %>% 
  group_by(id_type) %>% 
  arrange(desc(id_value)) %>% 
  distinct() %>% 
  #select(-c(id_type)) %>% 
  mutate(rowID = row_number()) %>% 
  pivot_wider(names_from = id_type, values_from = id_value)

标签: r

解决方案


推荐阅读