r - 我无法根据字符扩展行
问题描述
我有这样的数据
df <- structure(list(Division = structure(c(1L, 1L, 1L, 2L, 2L, 3L), .Label = c("Main data",
"Second data", "Third data"), class = "factor"), Gene = structure(1:6, .Label = c("ABI3BP",
"ADIPOQ", "AEBP1", "AGRN", "AMBN", "AMELX"), class = "factor"),
IDs = c(17265L, 13633L, 303L, 329L, 452L, 461L), IDs.Links = c(17265L,
13633L, 303L, 329L, 452L, 461L), UniID = structure(c(1L,
4L, 2L, 3L, 6L, 5L), .Label = c("B4DSV9:D3YTG3:E9PPR9:E9PRB5:H0Y897",
"C9JLQ8:H7C0W8:H7C1J5", "H0Y5U1:O00468", "Q15848", "Q99217",
"Q9NP70"), class = "factor"), Refseq_IDs = structure(c(4L,
3L, 1L, 6L, 5L, 2L), .Label = c("NP_001120.3", "NP_001133.1:NP_872621.1:NP_872622.1",
"NP_001171271.1:NP_004788.1", "NP_056244.2:XP_005247340.1",
"NP_057603.1", "NP_940978.2:XP_005244806.1:XP_006710696.1"
), class = "factor"), Orthology = structure(1:6, .Label = c("Mouse:Abi3bp|",
"Mouse:Adipoq|", "Mouse:Aebp1|", "Mouse:Agrn|", "Mouse:Ambn|",
"Mouse:Amelx|"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
在名为 UniID 的一列中,我有许多由 a 分隔的字符串: 我想将它们中的每一个放在一个新行中并重复其他列
愿望输出看起来像这样
df2 <-structure(list(Division = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 3L), .Label = c("Main data", "Second data",
"Third data"), class = "factor"), Gene = structure(c(1L, 1L,
1L, 1L, 1L, 2L, 3L, 4L, 5L, 6L, 6L, 7L, 8L), .Label = c("ABI3BP",
"ADIPOQ", "AEBP1", "AEBP2", "AEBP3", "AGRN", "AMBN", "AMELX"), class = "factor"),
IDs = c(17265L, 17265L, 17265L, 17265L, 17265L, 13633L, 303L,
303L, 303L, 329L, 329L, 452L, 461L), IDs.Links = c(17265L,
17265L, 17265L, 17265L, 17265L, 13633L, 303L, 303L, 303L,
329L, 329L, 452L, 461L), UniID = structure(c(1L, 3L, 4L,
5L, 7L, 11L, 2L, 8L, 9L, 6L, 10L, 13L, 12L), .Label = c("B4DSV9",
"C9JLQ8", "D3YTG3", "E9PPR9", "E9PRB5", "H0Y5U1", "H0Y897",
"H7C0W8", "H7C1J5", "O00468", "Q15848", "Q99217", "Q9NP70"
), class = "factor"), Refseq_IDs = structure(c(4L, 4L, 4L,
4L, 4L, 3L, 1L, 1L, 1L, 6L, 7L, 5L, 2L), .Label = c("NP_001120.3",
"NP_001133.1:NP_872621.1:NP_872622.1", "NP_001171271.1:NP_004788.1",
"NP_056244.2:XP_005247340.1", "NP_057603.1", "NP_940978.2:XP_005244806.1:XP_006710696.1",
"NP_940978.2:XP_005244806.1:XP_006710696.2"), class = "factor"),
Orthology = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 3L,
4L, 4L, 5L, 6L), .Label = c("Mouse:Abi3bp|", "Mouse:Adipoq|",
"Mouse:Aebp1|", "Mouse:Agrn|", "Mouse:Ambn|", "Mouse:Amelx|"
), class = "factor")), class = "data.frame", row.names = c(NA,
-13L))
我找到了其他帖子,我试图这样做,但没有任何成功。
s <- strsplit(as.character(df$UniID), ':')
mydf<-data.frame(director=unlist(s), IDs=rep(df$IDs, lengths(s)))
只给我 IDs 和 UniIDs 的列
mydf<- df[, lapply(.SD, function(x) unlist(tstrsplit(x, ":", fixed=TRUE))), by = IDs][!is.na(UniID)]
Error in `[.data.frame`(df, , lapply(.SD, function(x) unlist(tstrsplit(x, :
unused argument (by = IDs)
这个
mydf<- df[, strsplit(as.character(UniID), ":", fixed=TRUE),
+ by = .(IDs, UniID)][,.(UniID = V1, IDs)]
Error in `[.data.frame`(df, , strsplit(as.character(UniID), ":", fixed = TRUE), :
unused argument (by = .(IDs, UniID))
解决方案
一种dplyr
可能:
df %>%
mutate(UniID = strsplit(as.character(UniID), ":")) %>%
unnest()
Division Gene IDs IDs.Links Refseq_IDs Orthology UniID
1 Main data ABI3BP 17265 17265 NP_056244.2:XP_005247340.1 Mouse:Abi3bp| B4DSV9
2 Main data ABI3BP 17265 17265 NP_056244.2:XP_005247340.1 Mouse:Abi3bp| D3YTG3
3 Main data ABI3BP 17265 17265 NP_056244.2:XP_005247340.1 Mouse:Abi3bp| E9PPR9
4 Main data ABI3BP 17265 17265 NP_056244.2:XP_005247340.1 Mouse:Abi3bp| E9PRB5
5 Main data ABI3BP 17265 17265 NP_056244.2:XP_005247340.1 Mouse:Abi3bp| H0Y897
6 Main data ADIPOQ 13633 13633 NP_001171271.1:NP_004788.1 Mouse:Adipoq| Q15848
7 Main data AEBP1 303 303 NP_001120.3 Mouse:Aebp1| C9JLQ8
8 Main data AEBP1 303 303 NP_001120.3 Mouse:Aebp1| H7C0W8
9 Main data AEBP1 303 303 NP_001120.3 Mouse:Aebp1| H7C1J5
10 Second data AGRN 329 329 NP_940978.2:XP_005244806.1:XP_006710696.1 Mouse:Agrn| H0Y5U1
11 Second data AGRN 329 329 NP_940978.2:XP_005244806.1:XP_006710696.1 Mouse:Agrn| O00468
12 Second data AMBN 452 452 NP_057603.1 Mouse:Ambn| Q9NP70
13 Third data AMELX 461 461 NP_001133.1:NP_872621.1:NP_872622.1 Mouse:Amelx| Q99217
在这里,它将“UniID”列拆分:
然后取消嵌套。
推荐阅读
- python - 用python请求替换curl相当于将文件POST到服务器
- azure - Microsoft Azure IOT Hub 和自定义路由
- flutter - 如何在 Flutter 中排列盒子?
- google-colaboratory - 多个 Colab 笔记本可以共享同一个运行时吗?
- php - 使用未定义的常量 S - 假定为“S”
- javascript - 如何为 jQuery 动态文本框的项目编号
- javascript - 是否可以将锚标记的目标设置为 webview 标记?
- kubernetes - 如何使用 Istio API Key 授权服务?
- python - Python PostgreSQL - 导出到 CSV 不导出所有行
- r - 检查数据表列中的值并根据R中的条件返回新表