r - 将多个列 ID 折叠成一个 ID 列
问题描述
我正在尝试合并一些数据。我有一些包含 ID 的数据。我想将每一列合并到一个新列finalID
中,它将所有其他列折叠成一列。
对于许多列 ID,它们将是相同的,但是一两列可能不同。如果它们不同,那么我希望它可以将第二个 ID 添加到新列finalID_WithDoubleResults
中。
我怎样才能首先将列折叠成单个 ID 列然后,如果列包含不同的 ID 将它们拆分为两个或更多新列。
数据:
structure(list(sabiUniqueID.x = c(74L, 82L, 129L, 153L, 179L,
203L, 211L, 220L, 223L, 231L, 237L, 243L, 259L, 260L, 275L, 280L,
290L, 292L, 297L, 300L), sabiUniqueID.y = c(74L, 82L, 129L, NA,
NA, NA, 211L, NA, 223L, 231L, 237L, NA, NA, NA, NA, NA, 290L,
NA, 297L, NA), sabiUniqueID.x.x = c(74L, 82L, 129L, 153L, NA,
NA, 211L, NA, 223L, 231L, 237L, NA, NA, 260L, 275L, NA, 290L,
292L, 297L, NA), sabiUniqueID.y.y = c(NA, NA, 129L, NA, 179L,
203L, 211L, NA, 223L, NA, NA, NA, 259L, 260L, 275L, NA, 290L,
292L, NA, NA), sabiUniqueID.x.x.x = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 292L, NA, NA), sabiUniqueID.y.y.y = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 260L, 275L, NA,
290L, NA, NA, NA), sabiUniqueID.x.x.x.x = c(74L, 82L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 243L, NA, NA, NA, NA, NA, 292L, NA,
NA), sabiUniqueID.y.y.y.y = c(74L, 82L, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 260L, 275L, NA, 290L, NA, NA, NA), sabiUniqueID.x.x.x.x.x = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 290L,
NA, NA, 300L), sabiUniqueID.y.y.y.y.y = c(74L, 82L, 129L, NA,
179L, 203L, NA, NA, 223L, NA, NA, 243L, NA, 260L, NA, NA, 290L,
292L, NA, 300L), sabiUniqueID.x.x.x.x.x.x = c(74L, 82L, 129L,
153L, 179L, 203L, 211L, 220L, 223L, 231L, 237L, 243L, 259L, 260L,
275L, 280L, 290L, 292L, 297L, 300L), sabiUniqueID.y.y.y.y.y.y = c(NA,
82L, NA, NA, 179L, 203L, NA, NA, NA, NA, NA, 243L, 259L, 260L,
NA, 280L, 290L, 292L, NA, 300L), sabiUniqueID = c(NA, 82L, NA,
NA, 179L, 203L, NA, NA, NA, NA, NA, 243L, 259L, 260L, NA, 280L,
290L, 292L, NA, 300L)), row.names = c(NA, -20L), groups = structure(list(
sabiUniqueID.x = c(74L, 82L, 129L, 153L, 179L, 203L, 211L,
220L, 223L, 231L, 237L, 243L, 259L, 260L, 275L, 280L, 290L,
292L, 297L, 300L), sabiUniqueID.y = c(74L, 82L, 129L, NA,
NA, NA, 211L, NA, 223L, 231L, 237L, NA, NA, NA, NA, NA, 290L,
NA, 297L, NA), sabiUniqueID.x.x = c(74L, 82L, 129L, 153L,
NA, NA, 211L, NA, 223L, 231L, 237L, NA, NA, 260L, 275L, NA,
290L, 292L, 297L, NA), sabiUniqueID.y.y = c(NA, NA, 129L,
NA, 179L, 203L, 211L, NA, 223L, NA, NA, NA, 259L, 260L, 275L,
NA, 290L, 292L, NA, NA), sabiUniqueID.x.x.x = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 292L,
NA, NA), sabiUniqueID.y.y.y = c(NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 260L, 275L, NA, 290L, NA, NA, NA),
sabiUniqueID.x.x.x.x = c(74L, 82L, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 243L, NA, NA, NA, NA, NA, 292L, NA, NA), sabiUniqueID.y.y.y.y = c(74L,
82L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 260L, 275L,
NA, 290L, NA, NA, NA), sabiUniqueID.x.x.x.x.x = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 290L,
NA, NA, 300L), sabiUniqueID.y.y.y.y.y = c(74L, 82L, 129L,
NA, 179L, 203L, NA, NA, 223L, NA, NA, 243L, NA, 260L, NA,
NA, 290L, 292L, NA, 300L), sabiUniqueID.x.x.x.x.x.x = c(74L,
82L, 129L, 153L, 179L, 203L, 211L, 220L, 223L, 231L, 237L,
243L, 259L, 260L, 275L, 280L, 290L, 292L, 297L, 300L), sabiUniqueID.y.y.y.y.y.y = c(NA,
82L, NA, NA, 179L, 203L, NA, NA, NA, NA, NA, 243L, 259L,
260L, NA, 280L, 290L, 292L, NA, 300L), .rows = structure(list(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr"))), row.names = c(NA, 20L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
编辑:
较小的数据
structure(list(sabiUniqueID.x = c(NA, NA, NA, NA, 820L), sabiUniqueID.y = c(1301L,
NA, NA, NA, 820L), sabiUniqueID.x.x = c(9999L, NA, 1176L, NA,
820L), sabiUniqueID.y.y = c(NA, NA, NA, 1537L, 820L), sabiUniqueID.x.x.x = c(NA,
NA, NA, NA, 820L)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
看起来像:
sabiUniqueID.x sabiUniqueID.y sabiUniqueID.x.x sabiUniqueID.y.y sabiUniqueID.x.x.x
<int> <int> <int> <int> <int>
1 NA 1301 9999 NA NA
2 NA NA NA NA NA
3 NA NA 1176 NA NA
4 NA NA NA 1537 NA
5 820 820 820 820 820
现在我在哪里添加了结果9999
,它有两个用于这个观察的 ID(其他观察有重复的 ID)。
所以预期的输出可能如下所示:
finalID finalID_WithDoubleResults
1301 99999
NA NA
1176 NA
1537 NA
820 NA
编辑2:
df %>%
pivot_longer(cols = c(sabiUniqueID.x:sabiUniqueID.x.x.x), names_to = "id_type", values_to = "id_
value") %>%
group_by(id_type) %>%
arrange(desc(id_value)) %>%
distinct() %>%
#select(-c(id_type)) %>%
mutate(rowID = row_number()) %>%
pivot_wider(names_from = id_type, values_from = id_value)
解决方案
推荐阅读
- java - 如何用我自己创建的键盘替换普通的 InputMethod/Keyboard?
- c# - 当 Selenium ChromeDriver 的 URL 改变时触发一个事件
- visual-studio - 亚音速 ORM 发生器输出中文
- airflow - 使用代码,如何更新气流变量?
- android - 尝试使用 SQlite 创建表时出错
- delphi - 自定义控制感应加速字符
- azure - 我可以将现有的 Azure Redis 缓存从传统的经典 Azure 订阅移动到 CSP 吗?
- runtime - 什么是运行时单个循环,但它运行了 n 次?
- spring - 在 Spring SFTP 上设置首选身份验证方法
- c# - 启用 Windows 2008 R2 以接受对枚举 SslProtocols 3072 的强制转换