首页 > 解决方案 > 如何使用 pivot_longer 将面板/纵向调查数据从宽格式重塑为长格式

问题描述

我正在尝试使用 R 将从宽格式到长格式的调查中输入的面板/纵向数据集重塑。这很相似,但不同于我之前问过 的带有几个变量的宽到长格式的问题

以下是基本数据格式:

set.seed(1001)                     
df <- data.frame(                  
id = 1:2,                          
Q1_Choice = sample(1:3,2),         
Q2_Choice = sample(1:3,2),         
Q3_Choice = sample(1:3,2),         
Q1_Status_Opt_1 = c(0,0),          
Q1_Status_Opt_2 = sample(1:40, 2), 
Q1_Status_Opt_3 = sample(1:40, 2), 
Q2_Status_Opt_1 = c(0,0),          
Q2_Status_Opt_2 = sample(1:40, 2), 
Q2_Status_Opt_3 = sample(1:40, 2), 
Q3_Status_Opt_1 = c(0,0),          
Q3_Status_Opt_2 = sample(1:40, 2), 
Q3_Status_Opt_3 = sample(1:40, 2), 
Q1_Time_Opt_1 = c(5,5),            
Q1_Time_Opt_2 = sample(100:200, 2),
Q1_Time_Opt_3 = sample(100:200, 2),
Q2_Time_Opt_1 = c(5,5),            
Q2_Time_Opt_2 = sample(1:40, 2),   
Q2_Time_Opt_3 = sample(1:40, 2),   
Q3_Time_Opt_1 = c(5,5),            
Q3_Time_Opt_2 = sample(1:40, 2),   
Q3_Time_Opt_3 = sample(100:200, 2),
Age = c(45,67))                    

df                                 
#>   id Q1_Choice Q2_Choice Q3_Choice Q1_Status_Opt_1 Q1_Status_Opt_2
#> 1  1         3         3         3               0              11
#> 2  2         1         1         2               0              38
#>   Q1_Status_Opt_3 Q2_Status_Opt_1 Q2_Status_Opt_2 Q2_Status_Opt_3
#> 1              36               0              28              29
#> 2              12               0               4              40
#>   Q3_Status_Opt_1 Q3_Status_Opt_2 Q3_Status_Opt_3 Q1_Time_Opt_1
#> 1               0              31               3             5
#> 2               0              10              28             5
#>   Q1_Time_Opt_2 Q1_Time_Opt_3 Q2_Time_Opt_1 Q2_Time_Opt_2 Q2_Time_Opt_3
#> 1           100           149             5            30            32
#> 2           107           114             5            27             6
#>   Q3_Time_Opt_1 Q3_Time_Opt_2 Q3_Time_Opt_3 Age
#> 1             5             1           189  45
#> 2             5             8           107  67

结果应如下所示:

result                                                       
#>    id question option choice Status Time Age
#> 1   1        1      1      0      0    5  45
#> 2   1        1      2      0     11  100  45
#> 3   1        1      3      1     36  149  45
#> 4   1        2      1      0      0    5  45
#> 5   1        2      2      0     28   30  45
#> 6   1        2      3      1     29   32  45
#> 7   1        3      1      0      0    5  45
#> 8   1        3      2      0     31    1  45
#> 9   1        3      3      1      3  189  45
#> 10  2        1      1      1      0    5  67
#> 11  2        1      2      0     38  107  67
#> 12  2        1      3      0     12  114  67
#> 13  2        2      1      1      0    5  67
#> 14  2        2      2      0      4   27  67
#> 15  2        2      3      0     40    6  67
#> 16  2        3      1      0      0    5  67
#> 17  2        3      2      1     10    8  67
#> 18  2        3      3      0     28  107  67

出于某种原因,我真的很努力解决这些更复杂的重塑问题,因此非常感谢任何帮助。

标签: r

解决方案


我们可以使用pivot_longer将“宽”格式重塑为“长”格式

library(dplyr)
library(tidyr)
out1 <- df %>% 
           select(id, ends_with('Choice')) %>% 
           pivot_longer(cols = -id, names_to = c('question', '.value'), 
                      names_sep="_") %>%
           rename(choice = Choice)
df %>% 
       select(-ends_with('Choice')) %>% 
       pivot_longer(cols = -c(id, Age)) %>%
       separate(name, into = c('question', 'Status', 'option', 'value2')) %>% 
       pivot_wider(names_from = Status, values_from = value) %>% 
       select(-option) %>% 
       rename(option = value2) %>% 
       left_join(out1) %>% 
       mutate(choice = +(option == choice))
# A tibble: 18 x 7
#      id   Age question option Status  Time choice
#   <int> <dbl> <chr>    <chr>   <dbl> <dbl>  <int>
# 1     1    45 Q1       1           0     5      0
# 2     1    45 Q1       2          11   100      0
# 3     1    45 Q1       3          36   149      1
# 4     1    45 Q2       1           0     5      0
# 5     1    45 Q2       2          28    30      0
# 6     1    45 Q2       3          29    32      1
# 7     1    45 Q3       1           0     5      0
# 8     1    45 Q3       2          31     1      0
# 9     1    45 Q3       3           3   189      1
#10     2    67 Q1       1           0     5      1
#11     2    67 Q1       2          38   107      0
#12     2    67 Q1       3          12   114      0
#13     2    67 Q2       1           0     5      1
#14     2    67 Q2       2           4    27      0
#15     2    67 Q2       3          40     6      0
#16     2    67 Q3       1           0     5      0
#17     2    67 Q3       2          10     8      1
#18     2    67 Q3       3          28   107      0

或者我们可以做

df %>% 
    rename_at(vars(matches('Opt')), ~ str_replace(., "_(\\d+)$", ":\\1")) %>%
    select(-ends_with('Choice')) %>% 
    pivot_longer(cols = -c(id, Age), names_to = c('question', 'Status', 'option'),
         names_sep = "_") %>%
    pivot_wider(names_from = Status, values_from = value) %>%
    mutate(option =  readr::parse_number(option)) %>%
    left_join(out1) %>% 
    mutate(choice = +(choice == option))

推荐阅读