r - 以长格式展开嵌套列
问题描述
我有一个看起来像这样的嵌套数据框:
它的结构如下所示:
> dput( tt )
structure(list(chapter_id = "5e8ccfb7266dd92b6b96a4f7", user_id = "728d4c9f-b35b-11ea-b1f2-ff8dbabf084d",
result = structure(list(RULE1 = 0L, LIMIT1 = 0L, GOAL1 = 0L,
GNDR1 = 0L, EXCH1 = 0L, TIME2 = 0L, ADRS1 = 0L, SUMMARY1 = 0L,
THINK1 = 0L, NVERB1 = 0L, RUDE1 = 0L, AGREE1 = 0L, CLAIM1 = 0L,
INSPIRE1 = 0L, LOGIC1 = 0L, FLOW1 = 0L, TONE1 = 1L, ERROR2 = 0L,
TEAM2 = 0L, SUPP1 = 1L, TEAM1 = 0L, CLAR1 = 0L, CONTEXT1 = 0L,
PLSN1 = 0L, AGMT1 = 0L, RELATION1 = 0L, PM1 = 0L, CARE1 = 0L,
POV1 = 0L, VALID1 = 0L, OTHER1 = 0L, LOGIC3 = 0L, LEAD2 = 0L,
EXMP1 = 0L, COLLAB1 = 0L, COST1 = 0L, LOGIC2 = 0L, INFO1 = 0L,
EMO2 = 0L, EVAL1 = 0L, RSLT1 = 0L, FLEX1 = 0L, EMO1 = 0L,
GATHER1 = 0L, GROUP1 = 0L, ERROR1 = 0L, EXPL1 = 0L, SOLVE1 = 0L,
LEAD1 = 0L, INVOLV1 = 0L, CERT1 = 0L, INTRO1 = 0L, PLTE1 = 0L,
SIGN1 = 0L, HYPO1 = 1L, SENT2 = 1L, SPEND01 = 5L, SPEND04 = 12L,
TIME3 = FALSE, DEL01 = 0L, WORDS01 = 8L, WORDS02 = 37.0942812982998,
INAPR1 = 0L, CLAR2 = 0L, CLAR3 = 0L, DISC1 = 0L, SENT1 = NA_integer_,
CHECK1 = NA_integer_, SPELL01 = NA_integer_, TONE2 = NA_integer_,
`TONE2-POS` = NA_real_, `TONE2-NUT` = NA_real_, `TONE2-NEG` = NA_real_), class = "data.frame", row.names = c(NA,
-1L))), row.names = c(NA, -1L), class = c("tbl_df", "tbl",
"data.frame"))
使用 str() 它看起来像这样:
想要扩展结果数据框并变成四列数据框,列:
userid chapterid result_name result_value
这个怎么做 ?尝试过 unnest() 函数,但它似乎不起作用。
更新
如果我们有不止一排,我们如何使它工作?
> dput( tt2 )
structure(list(chapter_id = c("5e8cd0d8266dd92b6b96a4fb", "5e8ccfb7266dd92b6b96a4f7"
), user_id = c("7e2d7841-b558-11ea-bd3c-4d1a5464dae4", "728d4c9f-b35b-11ea-b1f2-ff8dbabf084d"
), result = structure(list(RULE1 = c(0L, 0L), LIMIT1 = c(0L,
0L), GOAL1 = 1:0, GNDR1 = c(0L, 0L), EXCH1 = c(0L, 0L), TIME2 = c(0L,
0L), ADRS1 = c(0L, 0L), SUMMARY1 = c(0L, 0L), THINK1 = c(0L,
0L), NVERB1 = c(0L, 0L), RUDE1 = c(0L, 0L), AGREE1 = c(0L, 0L
), CLAIM1 = c(0L, 0L), INSPIRE1 = c(0L, 0L), LOGIC1 = c(0L, 0L
), FLOW1 = c(0L, 0L), TONE1 = 0:1, ERROR2 = c(0L, 0L), TEAM2 = 1:0,
SUPP1 = 0:1, TEAM1 = c(4L, 0L), CLAR1 = c(0L, 0L), CONTEXT1 = c(0L,
0L), PLSN1 = c(0L, 0L), AGMT1 = c(0L, 0L), RELATION1 = c(0L,
0L), PM1 = c(0L, 0L), CARE1 = c(0L, 0L), POV1 = 1:0, VALID1 = c(0L,
0L), OTHER1 = c(0L, 0L), LOGIC3 = c(0L, 0L), LEAD2 = c(0L,
0L), EXMP1 = c(0L, 0L), COLLAB1 = c(0L, 0L), COST1 = c(0L,
0L), LOGIC2 = c(0L, 0L), INFO1 = c(0L, 0L), EMO2 = c(3L,
0L), EVAL1 = 1:0, RSLT1 = c(0L, 0L), FLEX1 = c(0L, 0L), EMO1 = c(0L,
0L), GATHER1 = c(0L, 0L), GROUP1 = c(5L, 0L), ERROR1 = c(0L,
0L), EXPL1 = c(0L, 0L), SOLVE1 = 1:0, LEAD1 = c(2L, 0L),
INVOLV1 = c(0L, 0L), CERT1 = c(0L, 0L), INTRO1 = c(0L, 0L
), PLTE1 = c(0L, 0L), SIGN1 = c(0L, 0L), HYPO1 = 0:1, SENT2 = c(10L,
1L), SPEND01 = c(782L, 5L), SPEND04 = c(974L, 12L), TIME3 = c(TRUE,
FALSE), DEL01 = c(394L, 0L), WORDS01 = c(115L, 8L), WORDS02 = c(46.0185741374906,
37.0942812982998), INAPR1 = c(0L, 0L), CLAR2 = c(0L, 0L),
CLAR3 = c(0L, 0L), DISC1 = c(0L, 0L), SENT1 = c(NA_integer_,
NA_integer_), CHECK1 = c(NA_integer_, NA_integer_), SPELL01 = c(NA_integer_,
NA_integer_), TONE2 = c(NA_integer_, NA_integer_), `TONE2-POS` = c(NA_real_,
NA_real_), `TONE2-NUT` = c(NA_real_, NA_real_), `TONE2-NEG` = c(NA_real_,
NA_real_)), class = "data.frame", row.names = c(NA, -2L))), row.names = c(NA,
-2L), class = c("tbl_df", "tbl", "data.frame"))
解决方案
我们可以在using中添加result
数据框的列作为列,然后使用.tt2
do.call
pivot_longer
library(dplyr)
library(tidyr)
tt2 %>%
select(-result) %>%
bind_cols(do.call(cbind.data.frame, tt2$result)) %>%
pivot_longer(cols = -c(chapter_id, user_id),
names_to = 'result_name',values_to = 'result_value')
# A tibble: 146 x 4
# chapter_id user_id result_name result_value
# <chr> <chr> <chr> <dbl>
# 1 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 RULE1 0
# 2 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 LIMIT1 0
# 3 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 GOAL1 1
# 4 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 GNDR1 0
# 5 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 EXCH1 0
# 6 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 TIME2 0
# 7 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 ADRS1 0
# 8 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 SUMMARY1 0
# 9 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 THINK1 0
#10 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 NVERB1 0
# … with 136 more rows
推荐阅读
- algorithm - 如何求时间复杂度和大O?
- python - 如何在 Powershell 中使用 Tree 压缩文件
- flutter - 颤动取消选择 RadioListTile 按钮
- python - 删除目录 - 测试文件繁忙错误
- python-3.x - 如何在python中构建任意深度的动态树?
- snakemake - 如何在snakemake中设置绑定内存限制
- python-3.x - 如何在 boto3 dynamodb 中查询 json 文件?
- java - 将数组向右移动一个插槽
- left-join - 列匹配 Spotfire 枢轴
- raku - 从 Raku 中没有 (Any) 人工制品的列表或数组中删除一个元素