首页 > 解决方案 > 在转置 df 时维护行名和列名

问题描述

我正在整理一个数据框:

ga_sessions_combined <- 
ga_sessions_combined %>% dput
structure(list(Metric = structure(1:7, .Label = c("Users", "Engaged Users", 
"Transactions", "Revenue", "ConversionRate", "Bounce Rate", "$/User"
), class = "factor"), ym_201904 = c(157664, 79295, 5764, 609172.887628, 
0.0365587578648265, 0.497063375279075, 3.86374116873858), ym_201905 = c(199340, 
103879, 5744, 673063.435872, 0.0288150897963279, 0.478885321561152, 
3.3764594956958), ym_201906 = c(169971, 90557, 4899, 566247.290325, 
0.0288225638491272, 0.467220878855805, 3.33143471724588), ym_201907 = c(161346, 
88059, 4223, 580408.759911, 0.0261735648854016, 0.454222602357666, 
3.5972925260682), ym_201908 = c(132702, 70701, 3106, 424807.71545, 
0.0234058265888984, 0.467219785685219, 3.20121562184443), ym_201909 = c(164160, 
96124, 3841, 724958.93068, 0.0233979044834308, 0.414449317738791, 
4.41617282334308), ym_201910 = c(217227, 118041, 4448, 798116.2282, 
0.0204762759693777, 0.456600698808159, 3.67411154322437), ym_201911 = c(970864, 
604606, 27713, 4859788.602792, 0.0285446777303515, 0.37724954267539, 
5.00563271765355), ym_201912 = c(1180689, 671162, 59536, 9447240.17602, 
0.0504247943361884, 0.431550560731912, 8.00146370129645), ym_202001 = c(216816, 
109637, 5057, 738079.024166, 0.0233239244336211, 0.494331599143975, 
3.40417231277212), ym_202002 = c(204113, 145975, 4847, 720506.474953, 
0.0237466501398735, 0.284832421256853, 3.52993917561841), ym_202003 = c(324266, 
229438, 8341, 1196234.593648, 0.0257227091338592, 0.292438923599761, 
3.68905341185323)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-7L), .Names = c("Metric", "ym_201904", "ym_201905", "ym_201906", 
"ym_201907", "ym_201908", "ym_201909", "ym_201910", "ym_201911", 
"ym_201912", "ym_202001", "ym_202002", "ym_202003"))

看起来像这样:

ga_sessions_combined %>% select(1:5)
# A tibble: 7 x 5
  Metric           ym_201904   ym_201905   ym_201906   ym_201907
  <fct>                <dbl>       <dbl>       <dbl>       <dbl>
1 Users          157664      199340      169971      161346     
2 Engaged Users   79295      103879       90557       88059     
3 Transactions     5764        5744        4899        4223     
4 Revenue        609173.     673063.     566247.     580409.    
5 ConversionRate      0.0366      0.0288      0.0288      0.0262
6 Bounce Rate         0.497       0.479       0.467       0.454 
7 $/User              3.86        3.38        3.33        3.60  

由于我想使用 dplyr 进行转换,因此我对新创建的列进行了转置和计算。但是我很难将其恢复为这种格式:

这是我的代码块。我很接近,但我怀疑有一种更简单、更直观的方法可以做到这一点。最好避免收集/传播或 pivot_longer/wider 功能,并尽可能坚持转置。

ga_sessions_combined %>% 
  column_to_rownames(var = "Metric") %>% # because when transposing the rownames will become the column names
  t() %>% # transpose
  as.data.frame() %>% # tur back to a df 
  rownames_to_column(var ="ym") %>% # now make what were the column names into rownames

  # do my transfrmations
  mutate_at(vars(Users, `Engaged Users`, Transactions), scales::comma_format()) %>% 
  mutate_at(vars(ConversionRate, `Bounce Rate`), scales::percent_format()) %>% 
  mutate(Revenue = scales::dollar(Revenue)) %>% 

  # try to get back into original layout
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column(var = "Metric") %>% 
  as.data.frame()

看起来像这样:

... %>%  select(1:5)
              Metric        V1        V2        V3        V4
    1             ym ym_201904 ym_201905 ym_201906 ym_201907
    2          Users   157,664   199,340   169,971   161,346
    3  Engaged Users    79,295   103,879    90,557    88,059
    4   Transactions     5,764     5,744     4,899     4,223
    5        Revenue  $609,173  $673,063  $566,247  $580,409
    6 ConversionRate     3.66%     2.88%     2.88%     2.62%
    7    Bounce Rate     49.7%     47.9%     46.7%     45.4%
    8         $/User  3.863741  3.376459  3.331435  3.597293

很近。我想把第 1 行的内容作为列标题。然后,将“ym”替换为“Metric”。

有没有更优雅的方式可以到达我想去的地方?如何转置,进行一些 dplyr 转换,然后转置回原始形状?

标签: rdplyr

解决方案


我们可以row_to_names使用janitor

ga_sessions_combined %>% 
  column_to_rownames(var = "Metric") %>% # because when transposing the rownames will become the column names
  t() %>% # transpose
  as.data.frame() %>% # tur back to a df 
  rownames_to_column(var ="ym") %>% # now make what were the column names into rownames

  # do my transfrmations
  mutate_at(vars(Users, `Engaged Users`, Transactions), scales::comma_format()) %>% 
  mutate_at(vars(ConversionRate, `Bounce Rate`), scales::percent_format()) %>% 
  mutate(Revenue = scales::dollar(Revenue)) %>% 

  # try to get back into original layout
  t() %>% 
  as.data.frame(stringsAsFactors = FALSE) %>% 
  rownames_to_column(var = "Metric") %>%             
  janitor::row_to_names(row_number = 1) %>%
  rename(Metric = ym)
# Metric ym_201904 ym_201905 ym_201906 ym_201907 ym_201908 ym_201909 ym_201910  ym_201911  ym_201912 ym_202001
#2          Users   157,664   199,340   169,971   161,346   132,702   164,160   217,227    970,864  1,180,689   216,816
#3  Engaged Users    79,295   103,879    90,557    88,059    70,701    96,124   118,041    604,606    671,162   109,637
#4   Transactions     5,764     5,744     4,899     4,223     3,106     3,841     4,448     27,713     59,536     5,057
#5        Revenue  $609,173  $673,063  $566,247  $580,409  $424,808  $724,959  $798,116 $4,859,789 $9,447,240  $738,079
#6 ConversionRate  3.65588%  2.88151%  2.88226%  2.61736%  2.34058%  2.33979%  2.04763%   2.85447%   5.04248%  2.33239%
#7    Bounce Rate 49.70634% 47.88853% 46.72209% 45.42226% 46.72198% 41.44493% 45.66007%  37.72495%  43.15506% 49.43316%
#8         $/User  3.863741  3.376459  3.331435  3.597293  3.201216  4.416173  3.674112   5.005633   8.001464  3.404172
#  ym_202002  ym_202003
#2   204,113    324,266
#3   145,975    229,438
#4     4,847      8,341
#5  $720,506 $1,196,235
#6  2.37467%   2.57227%
#7 28.48324%  29.24389%
#8  3.529939   3.689053

推荐阅读