首页 > 解决方案 > 以长格式展开嵌套列

问题描述

我有一个看起来像这样的嵌套数据框:

在此处输入图像描述

它的结构如下所示:

> dput( tt )
structure(list(chapter_id = "5e8ccfb7266dd92b6b96a4f7", user_id = "728d4c9f-b35b-11ea-b1f2-ff8dbabf084d", 
    result = structure(list(RULE1 = 0L, LIMIT1 = 0L, GOAL1 = 0L, 
        GNDR1 = 0L, EXCH1 = 0L, TIME2 = 0L, ADRS1 = 0L, SUMMARY1 = 0L, 
        THINK1 = 0L, NVERB1 = 0L, RUDE1 = 0L, AGREE1 = 0L, CLAIM1 = 0L, 
        INSPIRE1 = 0L, LOGIC1 = 0L, FLOW1 = 0L, TONE1 = 1L, ERROR2 = 0L, 
        TEAM2 = 0L, SUPP1 = 1L, TEAM1 = 0L, CLAR1 = 0L, CONTEXT1 = 0L, 
        PLSN1 = 0L, AGMT1 = 0L, RELATION1 = 0L, PM1 = 0L, CARE1 = 0L, 
        POV1 = 0L, VALID1 = 0L, OTHER1 = 0L, LOGIC3 = 0L, LEAD2 = 0L, 
        EXMP1 = 0L, COLLAB1 = 0L, COST1 = 0L, LOGIC2 = 0L, INFO1 = 0L, 
        EMO2 = 0L, EVAL1 = 0L, RSLT1 = 0L, FLEX1 = 0L, EMO1 = 0L, 
        GATHER1 = 0L, GROUP1 = 0L, ERROR1 = 0L, EXPL1 = 0L, SOLVE1 = 0L, 
        LEAD1 = 0L, INVOLV1 = 0L, CERT1 = 0L, INTRO1 = 0L, PLTE1 = 0L, 
        SIGN1 = 0L, HYPO1 = 1L, SENT2 = 1L, SPEND01 = 5L, SPEND04 = 12L, 
        TIME3 = FALSE, DEL01 = 0L, WORDS01 = 8L, WORDS02 = 37.0942812982998, 
        INAPR1 = 0L, CLAR2 = 0L, CLAR3 = 0L, DISC1 = 0L, SENT1 = NA_integer_, 
        CHECK1 = NA_integer_, SPELL01 = NA_integer_, TONE2 = NA_integer_, 
        `TONE2-POS` = NA_real_, `TONE2-NUT` = NA_real_, `TONE2-NEG` = NA_real_), class = "data.frame", row.names = c(NA, 
    -1L))), row.names = c(NA, -1L), class = c("tbl_df", "tbl", 
"data.frame"))

使用 str() 它看起来像这样:

在此处输入图像描述

想要扩展结果数据框并变成四列数据框,列:

userid  chapterid result_name result_value

这个怎么做 ?尝试过 unnest() 函数,但它似乎不起作用。

更新

如果我们有不止一排,我们如何使它工作?

> dput( tt2 )
structure(list(chapter_id = c("5e8cd0d8266dd92b6b96a4fb", "5e8ccfb7266dd92b6b96a4f7"
), user_id = c("7e2d7841-b558-11ea-bd3c-4d1a5464dae4", "728d4c9f-b35b-11ea-b1f2-ff8dbabf084d"
), result = structure(list(RULE1 = c(0L, 0L), LIMIT1 = c(0L, 
0L), GOAL1 = 1:0, GNDR1 = c(0L, 0L), EXCH1 = c(0L, 0L), TIME2 = c(0L, 
0L), ADRS1 = c(0L, 0L), SUMMARY1 = c(0L, 0L), THINK1 = c(0L, 
0L), NVERB1 = c(0L, 0L), RUDE1 = c(0L, 0L), AGREE1 = c(0L, 0L
), CLAIM1 = c(0L, 0L), INSPIRE1 = c(0L, 0L), LOGIC1 = c(0L, 0L
), FLOW1 = c(0L, 0L), TONE1 = 0:1, ERROR2 = c(0L, 0L), TEAM2 = 1:0, 
    SUPP1 = 0:1, TEAM1 = c(4L, 0L), CLAR1 = c(0L, 0L), CONTEXT1 = c(0L, 
    0L), PLSN1 = c(0L, 0L), AGMT1 = c(0L, 0L), RELATION1 = c(0L, 
    0L), PM1 = c(0L, 0L), CARE1 = c(0L, 0L), POV1 = 1:0, VALID1 = c(0L, 
    0L), OTHER1 = c(0L, 0L), LOGIC3 = c(0L, 0L), LEAD2 = c(0L, 
    0L), EXMP1 = c(0L, 0L), COLLAB1 = c(0L, 0L), COST1 = c(0L, 
    0L), LOGIC2 = c(0L, 0L), INFO1 = c(0L, 0L), EMO2 = c(3L, 
    0L), EVAL1 = 1:0, RSLT1 = c(0L, 0L), FLEX1 = c(0L, 0L), EMO1 = c(0L, 
    0L), GATHER1 = c(0L, 0L), GROUP1 = c(5L, 0L), ERROR1 = c(0L, 
    0L), EXPL1 = c(0L, 0L), SOLVE1 = 1:0, LEAD1 = c(2L, 0L), 
    INVOLV1 = c(0L, 0L), CERT1 = c(0L, 0L), INTRO1 = c(0L, 0L
    ), PLTE1 = c(0L, 0L), SIGN1 = c(0L, 0L), HYPO1 = 0:1, SENT2 = c(10L, 
    1L), SPEND01 = c(782L, 5L), SPEND04 = c(974L, 12L), TIME3 = c(TRUE, 
    FALSE), DEL01 = c(394L, 0L), WORDS01 = c(115L, 8L), WORDS02 = c(46.0185741374906, 
    37.0942812982998), INAPR1 = c(0L, 0L), CLAR2 = c(0L, 0L), 
    CLAR3 = c(0L, 0L), DISC1 = c(0L, 0L), SENT1 = c(NA_integer_, 
    NA_integer_), CHECK1 = c(NA_integer_, NA_integer_), SPELL01 = c(NA_integer_, 
    NA_integer_), TONE2 = c(NA_integer_, NA_integer_), `TONE2-POS` = c(NA_real_, 
    NA_real_), `TONE2-NUT` = c(NA_real_, NA_real_), `TONE2-NEG` = c(NA_real_, 
    NA_real_)), class = "data.frame", row.names = c(NA, -2L))), row.names = c(NA, 
-2L), class = c("tbl_df", "tbl", "data.frame"))

标签: r

解决方案


我们可以在using中添加result数据框的列作为列,然后使用.tt2do.callpivot_longer

library(dplyr)
library(tidyr)

tt2 %>%
  select(-result) %>%
  bind_cols(do.call(cbind.data.frame, tt2$result)) %>%
  pivot_longer(cols = -c(chapter_id, user_id), 
               names_to = 'result_name',values_to = 'result_value')

# A tibble: 146 x 4
#   chapter_id               user_id                              result_name result_value
#   <chr>                    <chr>                                <chr>              <dbl>
# 1 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 RULE1                  0
# 2 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 LIMIT1                 0
# 3 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 GOAL1                  1
# 4 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 GNDR1                  0
# 5 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 EXCH1                  0
# 6 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 TIME2                  0
# 7 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 ADRS1                  0
# 8 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 SUMMARY1               0
# 9 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 THINK1                 0
#10 5e8cd0d8266dd92b6b96a4fb 7e2d7841-b558-11ea-bd3c-4d1a5464dae4 NVERB1                 0
# … with 136 more rows

推荐阅读