r - 我想同时pivot_longer 多列
问题描述
我有 3 列的集合,我想将它们转换为 3 个单独的列,第一列将集合的第一列的名称作为值,另外两列从其他 2 列中获取值。 帮助你理解。非常感激
解决方案
让我们从一些可重现的数据开始:
library(tidyr)
set.seed(123)
df <- tibble(
COLS = rep(paste0(rep(LETTERS,each = 10),"_", rep(1:10, 26)), each = 100),
INDX = rep(1:100, 260),
VAL = rnorm(26000)
)
df
#> # A tibble: 26,000 x 3
#> COLS INDX VAL
#> <chr> <int> <dbl>
#> 1 A_1 1 -0.560
#> 2 A_1 2 -0.230
#> 3 A_1 3 1.56
#> 4 A_1 4 0.0705
#> 5 A_1 5 0.129
#> 6 A_1 6 1.72
#> 7 A_1 7 0.461
#> 8 A_1 8 -1.27
#> 9 A_1 9 -0.687
#> 10 A_1 10 -0.446
#> # … with 25,990 more rows
starting_df <- pivot_wider(
df, id_cols = INDX,
names_from = COLS, values_from = VAL
)
starting_df
#> # A tibble: 100 x 261
#> INDX A_1 A_2 A_3 A_4 A_5 A_6 A_7 A_8 A_9
#> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 -0.560 -0.710 2.20 -0.715 -0.0736 -0.602 1.07 -0.728 0.356
#> 2 2 -0.230 0.257 1.31 -0.753 -1.17 -0.994 -0.0273 -1.54 -0.658
#> 3 3 1.56 -0.247 -0.265 -0.939 -0.635 1.03 -0.0333 -0.693 0.855
#> 4 4 0.0705 -0.348 0.543 -1.05 -0.0288 0.751 -1.52 0.119 1.15
#> 5 5 0.129 -0.952 -0.414 -0.437 0.671 -1.51 0.790 -1.36 0.276
#> 6 6 1.72 -0.0450 -0.476 0.331 -1.65 -0.0951 -0.211 0.590 0.144
#> 7 7 0.461 -0.785 -0.789 -2.01 -0.350 -0.896 -0.657 0.289 -0.0756
#> 8 8 -1.27 -1.67 -0.595 0.212 0.756 -2.07 -1.41 -0.904 2.16
#> 9 9 -0.687 -0.380 1.65 1.24 -0.539 0.150 -0.300 0.226 0.276
#> 10 10 -0.446 0.919 -0.0540 2.04 0.227 -0.0792 -0.849 0.748 -0.158
#> # … with 90 more rows, and 251 more variables: A_10 <dbl>, B_1 <dbl>,
#> # B_2 <dbl>, B_3 <dbl>, B_4 <dbl>, B_5 <dbl>, B_6 <dbl>, B_7 <dbl>,
#> # B_8 <dbl>, B_9 <dbl>, B_10 <dbl>, C_1 <dbl>, C_2 <dbl>, C_3 <dbl>,
#> # C_4 <dbl>, C_5 <dbl>, C_6 <dbl>, C_7 <dbl>, C_8 <dbl>, C_9 <dbl>,
#> # C_10 <dbl>, D_1 <dbl>, D_2 <dbl>, D_3 <dbl>, D_4 <dbl>, D_5 <dbl>,
#> # D_6 <dbl>, D_7 <dbl>, D_8 <dbl>, D_9 <dbl>, D_10 <dbl>, E_1 <dbl>,
#> # E_2 <dbl>, E_3 <dbl>, E_4 <dbl>, E_5 <dbl>, E_6 <dbl>, E_7 <dbl>,
#> # E_8 <dbl>, E_9 <dbl>, E_10 <dbl>, F_1 <dbl>, F_2 <dbl>, F_3 <dbl>,
#> # F_4 <dbl>, F_5 <dbl>, F_6 <dbl>, F_7 <dbl>, F_8 <dbl>, F_9 <dbl>,
#> # F_10 <dbl>, G_1 <dbl>, G_2 <dbl>, G_3 <dbl>, G_4 <dbl>, G_5 <dbl>,
#> # G_6 <dbl>, G_7 <dbl>, G_8 <dbl>, G_9 <dbl>, G_10 <dbl>, H_1 <dbl>,
#> # H_2 <dbl>, H_3 <dbl>, H_4 <dbl>, H_5 <dbl>, H_6 <dbl>, H_7 <dbl>,
#> # H_8 <dbl>, H_9 <dbl>, H_10 <dbl>, I_1 <dbl>, I_2 <dbl>, I_3 <dbl>,
#> # I_4 <dbl>, I_5 <dbl>, I_6 <dbl>, I_7 <dbl>, I_8 <dbl>, I_9 <dbl>,
#> # I_10 <dbl>, J_1 <dbl>, J_2 <dbl>, J_3 <dbl>, J_4 <dbl>, J_5 <dbl>,
#> # J_6 <dbl>, J_7 <dbl>, J_8 <dbl>, J_9 <dbl>, J_10 <dbl>, K_1 <dbl>,
#> # K_2 <dbl>, K_3 <dbl>, K_4 <dbl>, K_5 <dbl>, K_6 <dbl>, K_7 <dbl>,
#> # K_8 <dbl>, K_9 <dbl>, …
pivot_longer(
starting_df,
#INDX is only helpful for generating the data
#In your data you can just use `everything()`
cols = -INDX,
#specify how the columns will be named
#if you use `names_pattern`, each regrex group will
#will be captured into column.
#".value" is a special placeholder for potentially
#multiple columns
names_to = c("LETTER",".value"),
#regrex pattern. Each group is specified with parenthesis
#"(.)" first group and captures 1 character. maps to "LETTER",
#"_" just used for matching
#"(.*)" second group, captures multiple characters. maps to ".value"
# and as such a new column is made for each match here.
names_pattern = c("(.)_(.*)")
)
#> # A tibble: 2,600 x 12
#> INDX LETTER `1` `2` `3` `4` `5` `6` `7` `8` `9`
#> <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 A -0.560 -0.710 2.20 -0.715 -0.0736 -0.602 1.07 -0.728 0.356
#> 2 1 B -0.996 0.916 0.620 -0.750 -1.09 -0.821 -0.289 -0.193 -1.29
#> 3 1 C -0.512 -0.200 2.28 0.208 -0.483 -0.679 1.62 -2.01 0.317
#> 4 1 D -0.150 1.09 -0.842 -0.304 -0.369 1.48 -0.214 -0.933 0.702
#> 5 1 E 0.197 1.07 0.651 1.43 -0.0329 0.834 0.917 1.75 -0.633
#> 6 1 F -0.494 0.254 1.07 -0.108 -0.604 -1.36 -1.56 -0.732 1.00
#> 7 1 G -0.699 -0.875 1.03 -0.0275 -1.36 0.584 -0.816 1.08 0.190
#> 8 1 H -1.62 0.341 0.928 -0.162 0.545 0.350 0.885 -1.18 1.16
#> 9 1 I 0.511 -1.02 -0.495 -0.109 0.524 0.273 -0.414 -0.743 -0.219
#> 10 1 J 1.93 0.103 -0.204 -0.123 0.202 -0.338 -1.18 0.450 2.11
#> # … with 2,590 more rows, and 1 more variable: 10 <dbl>
由reprex 包于 2021-05-11 创建(v1.0.0)
推荐阅读
- azure - 在应用服务计划上托管 Azure Functions 时的内存注意事项
- objective-c - 如何启用后台线程绘制到 NSView
- kotlin - 具有可能的空值的 Kotlin 映射器
- c++ - 间接运算符如何通过运算符重载返回指针地址
- ocaml - OCaml 对二次根的偏应用
- android - 不能使用提供的参数调用以下函数: Kotlin
- python - 使用 python 在没有 protobuf 的情况下实时解析 GTFS
- python - 将加速度计数据存储在树莓派 SD 卡中
- .htaccess - 仅重定向带有子页面的父页面
- mysql - 将不同的列分组为一行