r - 格式化数据框
问题描述
有没有办法快速格式化我的数据,如下图所示?
数据结构:
期望的输出:
样本数据:
df<-structure(list(serial = c(11011209, 11011209, 11011210, 11011212
), pnum = c(1, 2, 2, 1), Mo_start = c(NA, NA, "t0800_0815_d2",
NA), Mo_end = c(NA, NA, "t1545_1600_d2", NA), Tue_start = c("t0700_0715_d3",
NA, "t0800_0815_d2", NA), Tue_end = c("t1445_1500_d3", NA, "t1545_1600_d2",
NA), Wed_start = c("t0700_0715_d4", "t0700_0715_d4", "t0800_0815_d2",
"t0900_0915_d4"), Wed_end = c("t1445_1500_d4", "t1515_1530_d4",
"t1545_1600_d2", "t1545_1600_d4"), Thur_start = c("t0700_0715_d5",
"t0700_0715_d5", "t0800_0815_d2", NA), Thur_end = c("t1445_1500_d5",
"t1445_1500_d5", "t1545_1600_d2", NA), Fri_start = c(NA, NA,
NA, NA), Fri_end = c(NA, NA, NA, NA)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), spec = structure(list(
cols = list(serial = structure(list(), class = c("collector_double",
"collector")), pnum = structure(list(), class = c("collector_double",
"collector")), Mo_start = structure(list(), class = c("collector_character",
"collector")), Mo_end = structure(list(), class = c("collector_character",
"collector")), Tue_start = structure(list(), class = c("collector_character",
"collector")), Tue_end = structure(list(), class = c("collector_character",
"collector")), Wed_start = structure(list(), class = c("collector_character",
"collector")), Wed_end = structure(list(), class = c("collector_character",
"collector")), Thur_start = structure(list(), class = c("collector_character",
"collector")), Thur_end = structure(list(), class = c("collector_character",
"collector")), Fri_start = structure(list(), class = c("collector_logical",
"collector")), Fri_end = structure(list(), class = c("collector_logical",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1L), class = "col_spec"))
解决方案
我们可以pivot_longer
使用names_pattern
library(tidyr)
pivot_longer(df, cols = contains('_'),
names_to = c('weekday', '.value'), names_pattern = '(.*)_(\\w+)$')
-输出
# A tibble: 20 x 5
serial pnum weekday start end
<dbl> <dbl> <chr> <chr> <chr>
1 11011209 1 Mo <NA> <NA>
2 11011209 1 Tue t0700_0715_d3 t1445_1500_d3
3 11011209 1 Wed t0700_0715_d4 t1445_1500_d4
4 11011209 1 Thur t0700_0715_d5 t1445_1500_d5
5 11011209 1 Fri <NA> <NA>
6 11011209 2 Mo <NA> <NA>
7 11011209 2 Tue <NA> <NA>
8 11011209 2 Wed t0700_0715_d4 t1515_1530_d4
9 11011209 2 Thur t0700_0715_d5 t1445_1500_d5
10 11011209 2 Fri <NA> <NA>
11 11011210 2 Mo t0800_0815_d2 t1545_1600_d2
12 11011210 2 Tue t0800_0815_d2 t1545_1600_d2
13 11011210 2 Wed t0800_0815_d2 t1545_1600_d2
14 11011210 2 Thur t0800_0815_d2 t1545_1600_d2
15 11011210 2 Fri <NA> <NA>
16 11011212 1 Mo <NA> <NA>
17 11011212 1 Tue <NA> <NA>
18 11011212 1 Wed t0900_0915_d4 t1545_1600_d4
19 11011212 1 Thur <NA> <NA>
20 11011212 1 Fri <NA> <NA>