首页 > 解决方案 > 格式化数据框

问题描述

有没有办法快速格式化我的数据,如下图所示?

数据结构:

在此处输入图像描述

期望的输出:

在此处输入图像描述

样本数据:

df<-structure(list(serial = c(11011209, 11011209, 11011210, 11011212
), pnum = c(1, 2, 2, 1), Mo_start = c(NA, NA, "t0800_0815_d2", 
NA), Mo_end = c(NA, NA, "t1545_1600_d2", NA), Tue_start = c("t0700_0715_d3", 
NA, "t0800_0815_d2", NA), Tue_end = c("t1445_1500_d3", NA, "t1545_1600_d2", 
NA), Wed_start = c("t0700_0715_d4", "t0700_0715_d4", "t0800_0815_d2", 
"t0900_0915_d4"), Wed_end = c("t1445_1500_d4", "t1515_1530_d4", 
"t1545_1600_d2", "t1545_1600_d4"), Thur_start = c("t0700_0715_d5", 
"t0700_0715_d5", "t0800_0815_d2", NA), Thur_end = c("t1445_1500_d5", 
"t1445_1500_d5", "t1545_1600_d2", NA), Fri_start = c(NA, NA, 
NA, NA), Fri_end = c(NA, NA, NA, NA)), class = c("spec_tbl_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -4L), spec = structure(list(
    cols = list(serial = structure(list(), class = c("collector_double", 
    "collector")), pnum = structure(list(), class = c("collector_double", 
    "collector")), Mo_start = structure(list(), class = c("collector_character", 
    "collector")), Mo_end = structure(list(), class = c("collector_character", 
    "collector")), Tue_start = structure(list(), class = c("collector_character", 
    "collector")), Tue_end = structure(list(), class = c("collector_character", 
    "collector")), Wed_start = structure(list(), class = c("collector_character", 
    "collector")), Wed_end = structure(list(), class = c("collector_character", 
    "collector")), Thur_start = structure(list(), class = c("collector_character", 
    "collector")), Thur_end = structure(list(), class = c("collector_character", 
    "collector")), Fri_start = structure(list(), class = c("collector_logical", 
    "collector")), Fri_end = structure(list(), class = c("collector_logical", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), skip = 1L), class = "col_spec"))

标签: rdataframe

解决方案


我们可以pivot_longer使用names_pattern

library(tidyr)
pivot_longer(df, cols = contains('_'),
    names_to = c('weekday', '.value'), names_pattern = '(.*)_(\\w+)$')

-输出

# A tibble: 20 x 5
     serial  pnum weekday start         end          
      <dbl> <dbl> <chr>   <chr>         <chr>        
 1 11011209     1 Mo      <NA>          <NA>         
 2 11011209     1 Tue     t0700_0715_d3 t1445_1500_d3
 3 11011209     1 Wed     t0700_0715_d4 t1445_1500_d4
 4 11011209     1 Thur    t0700_0715_d5 t1445_1500_d5
 5 11011209     1 Fri     <NA>          <NA>         
 6 11011209     2 Mo      <NA>          <NA>         
 7 11011209     2 Tue     <NA>          <NA>         
 8 11011209     2 Wed     t0700_0715_d4 t1515_1530_d4
 9 11011209     2 Thur    t0700_0715_d5 t1445_1500_d5
10 11011209     2 Fri     <NA>          <NA>         
11 11011210     2 Mo      t0800_0815_d2 t1545_1600_d2
12 11011210     2 Tue     t0800_0815_d2 t1545_1600_d2
13 11011210     2 Wed     t0800_0815_d2 t1545_1600_d2
14 11011210     2 Thur    t0800_0815_d2 t1545_1600_d2
15 11011210     2 Fri     <NA>          <NA>         
16 11011212     1 Mo      <NA>          <NA>         
17 11011212     1 Tue     <NA>          <NA>         
18 11011212     1 Wed     t0900_0915_d4 t1545_1600_d4
19 11011212     1 Thur    <NA>          <NA>         
20 11011212     1 Fri     <NA>          <NA>         

推荐阅读