首页 > 解决方案 > 我想同时pivot_longer 多列

问题描述

我有 3 列的集合,我想将它们转换为 3 个单独的列,第一列将集合的第一列的名称作为值,另外两列从其他 2 列中获取值。 这是草图帮助你理解。非常感激

标签: rdplyrpivot

解决方案


让我们从一些可重现的数据开始:

library(tidyr)
set.seed(123)
df <- tibble(
  COLS = rep(paste0(rep(LETTERS,each = 10),"_", rep(1:10, 26)), each = 100),
  INDX = rep(1:100, 260),
  VAL = rnorm(26000)
)

df
#> # A tibble: 26,000 x 3
#>    COLS   INDX     VAL
#>    <chr> <int>   <dbl>
#>  1 A_1       1 -0.560 
#>  2 A_1       2 -0.230 
#>  3 A_1       3  1.56  
#>  4 A_1       4  0.0705
#>  5 A_1       5  0.129 
#>  6 A_1       6  1.72  
#>  7 A_1       7  0.461 
#>  8 A_1       8 -1.27  
#>  9 A_1       9 -0.687 
#> 10 A_1      10 -0.446 
#> # … with 25,990 more rows

starting_df <- pivot_wider(
  df, id_cols = INDX,
  names_from = COLS, values_from = VAL
)

starting_df
#> # A tibble: 100 x 261
#>     INDX     A_1     A_2     A_3    A_4     A_5     A_6     A_7    A_8     A_9
#>    <int>   <dbl>   <dbl>   <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>   <dbl>
#>  1     1 -0.560  -0.710   2.20   -0.715 -0.0736 -0.602   1.07   -0.728  0.356 
#>  2     2 -0.230   0.257   1.31   -0.753 -1.17   -0.994  -0.0273 -1.54  -0.658 
#>  3     3  1.56   -0.247  -0.265  -0.939 -0.635   1.03   -0.0333 -0.693  0.855 
#>  4     4  0.0705 -0.348   0.543  -1.05  -0.0288  0.751  -1.52    0.119  1.15  
#>  5     5  0.129  -0.952  -0.414  -0.437  0.671  -1.51    0.790  -1.36   0.276 
#>  6     6  1.72   -0.0450 -0.476   0.331 -1.65   -0.0951 -0.211   0.590  0.144 
#>  7     7  0.461  -0.785  -0.789  -2.01  -0.350  -0.896  -0.657   0.289 -0.0756
#>  8     8 -1.27   -1.67   -0.595   0.212  0.756  -2.07   -1.41   -0.904  2.16  
#>  9     9 -0.687  -0.380   1.65    1.24  -0.539   0.150  -0.300   0.226  0.276 
#> 10    10 -0.446   0.919  -0.0540  2.04   0.227  -0.0792 -0.849   0.748 -0.158 
#> # … with 90 more rows, and 251 more variables: A_10 <dbl>, B_1 <dbl>,
#> #   B_2 <dbl>, B_3 <dbl>, B_4 <dbl>, B_5 <dbl>, B_6 <dbl>, B_7 <dbl>,
#> #   B_8 <dbl>, B_9 <dbl>, B_10 <dbl>, C_1 <dbl>, C_2 <dbl>, C_3 <dbl>,
#> #   C_4 <dbl>, C_5 <dbl>, C_6 <dbl>, C_7 <dbl>, C_8 <dbl>, C_9 <dbl>,
#> #   C_10 <dbl>, D_1 <dbl>, D_2 <dbl>, D_3 <dbl>, D_4 <dbl>, D_5 <dbl>,
#> #   D_6 <dbl>, D_7 <dbl>, D_8 <dbl>, D_9 <dbl>, D_10 <dbl>, E_1 <dbl>,
#> #   E_2 <dbl>, E_3 <dbl>, E_4 <dbl>, E_5 <dbl>, E_6 <dbl>, E_7 <dbl>,
#> #   E_8 <dbl>, E_9 <dbl>, E_10 <dbl>, F_1 <dbl>, F_2 <dbl>, F_3 <dbl>,
#> #   F_4 <dbl>, F_5 <dbl>, F_6 <dbl>, F_7 <dbl>, F_8 <dbl>, F_9 <dbl>,
#> #   F_10 <dbl>, G_1 <dbl>, G_2 <dbl>, G_3 <dbl>, G_4 <dbl>, G_5 <dbl>,
#> #   G_6 <dbl>, G_7 <dbl>, G_8 <dbl>, G_9 <dbl>, G_10 <dbl>, H_1 <dbl>,
#> #   H_2 <dbl>, H_3 <dbl>, H_4 <dbl>, H_5 <dbl>, H_6 <dbl>, H_7 <dbl>,
#> #   H_8 <dbl>, H_9 <dbl>, H_10 <dbl>, I_1 <dbl>, I_2 <dbl>, I_3 <dbl>,
#> #   I_4 <dbl>, I_5 <dbl>, I_6 <dbl>, I_7 <dbl>, I_8 <dbl>, I_9 <dbl>,
#> #   I_10 <dbl>, J_1 <dbl>, J_2 <dbl>, J_3 <dbl>, J_4 <dbl>, J_5 <dbl>,
#> #   J_6 <dbl>, J_7 <dbl>, J_8 <dbl>, J_9 <dbl>, J_10 <dbl>, K_1 <dbl>,
#> #   K_2 <dbl>, K_3 <dbl>, K_4 <dbl>, K_5 <dbl>, K_6 <dbl>, K_7 <dbl>,
#> #   K_8 <dbl>, K_9 <dbl>, …
pivot_longer(
  starting_df,
  #INDX is only helpful for generating the data
  #In your data you can just use `everything()`
  cols = -INDX, 
  #specify how the columns will be named
  #if you use `names_pattern`, each regrex group will
  #will be captured into column.
  #".value" is a special placeholder for potentially 
  #multiple columns
  names_to = c("LETTER",".value"),
  #regrex pattern. Each group is specified with parenthesis
  #"(.)" first group and captures 1 character. maps to "LETTER", 
  #"_" just used for matching 
  #"(.*)" second group, captures multiple characters. maps to ".value" 
  #  and as such a new column is made for each match here.
  names_pattern = c("(.)_(.*)")
)
#> # A tibble: 2,600 x 12
#>     INDX LETTER    `1`    `2`    `3`     `4`     `5`    `6`    `7`    `8`    `9`
#>    <int> <chr>   <dbl>  <dbl>  <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#>  1     1 A      -0.560 -0.710  2.20  -0.715  -0.0736 -0.602  1.07  -0.728  0.356
#>  2     1 B      -0.996  0.916  0.620 -0.750  -1.09   -0.821 -0.289 -0.193 -1.29 
#>  3     1 C      -0.512 -0.200  2.28   0.208  -0.483  -0.679  1.62  -2.01   0.317
#>  4     1 D      -0.150  1.09  -0.842 -0.304  -0.369   1.48  -0.214 -0.933  0.702
#>  5     1 E       0.197  1.07   0.651  1.43   -0.0329  0.834  0.917  1.75  -0.633
#>  6     1 F      -0.494  0.254  1.07  -0.108  -0.604  -1.36  -1.56  -0.732  1.00 
#>  7     1 G      -0.699 -0.875  1.03  -0.0275 -1.36    0.584 -0.816  1.08   0.190
#>  8     1 H      -1.62   0.341  0.928 -0.162   0.545   0.350  0.885 -1.18   1.16 
#>  9     1 I       0.511 -1.02  -0.495 -0.109   0.524   0.273 -0.414 -0.743 -0.219
#> 10     1 J       1.93   0.103 -0.204 -0.123   0.202  -0.338 -1.18   0.450  2.11 
#> # … with 2,590 more rows, and 1 more variable: 10 <dbl>

reprex 包于 2021-05-11 创建(v1.0.0)


推荐阅读