首页 > 解决方案 > 按组对变量求和

问题描述

我想减少代码行以对不同的变量进行分组并添加而不从我的基础中删除变量。

这就是我所做的(它有效,但代码有点长)

test1
names(test1)
#[1] "id"                            "DPA_DESPAR"                   
#[3] "DPA_VALOR"                     "DPA_ANIO"                     
#[5] "DPA_CANTON"                    "DPA_DESCAN"                   
#[7] "DPA_PROVIN"                    "DPA_DESPRO"                   
#...
#[19] "exp_omega"                     "total_empl"       
#...         
#[23] "geometry"                     

ss1= test1 %>% 
  group_by(DPA_CANTON) %>% 
  summarise(tot1= sum(total_empl)) 

ss2 = test1 %>% 
  group_by(id) %>% 
  summarise(tot2 = sum(total_empl)) 

test1 = left_join(test1, ss1, by = 'DPA_CANTON')  %>% 
  left_join(., ss2, by = 'id')

test1[1:10,21:22]
#tot1                      tot2
#1            37781         23049
#2            37781          1456
#3            37781          1198
#4            37781          2253
#5            37781           800
#6            37781          1705
#7            37781           615
#8            37781          4663
#9            37781          2042
#10            6593          5022

这不起作用

我用这个

aggregate(test1$total_empl, by=list(Category=test1[c(1,5)]), FUN=sum)
#Error in aggregate.data.frame(as.data.frame(x), ...) : 
#  arguments must have same length

数据

test1 = structure(list(id= c("020150", "020151", 
"020153", "020155", "020156", "020157", "020158", "020159", "020160", 
"020250", "020251", "020350", "020351", "020353", "020354", "020450", 
"020550", "020551", "020552", "020553"), DPA_CANTON = c("0201", 
"0201", "0201", "0201", "0201", "0201", "0201", "0201", "0201", 
"0202", "0202", "0203", "0203", "0203", "0203", "0204", "0205", 
"0205", "0205", "0205"), total_empl= c(23049, 1456, 
1198, 2253, 800, 1705, 615, 4663, 2042, 5022, 1571, 2481, 1077, 
973, 441, 4689, 4884, 875, 1046, 341)), row.names = c(NA, 20L
), class = "data.frame")

标签: rdplyr

解决方案


你可以坚持base

transform(
  test1,
  tot1 = ave(total_empl, DPA_CANTON, FUN = sum),
  tot2 = ave(total_empl, id, FUN = sum)
)

输出:

       id DPA_CANTON total_empl  tot1  tot2
1  020150       0201      23049 37781 23049
2  020151       0201       1456 37781  1456
3  020153       0201       1198 37781  1198
4  020155       0201       2253 37781  2253
5  020156       0201        800 37781   800
6  020157       0201       1705 37781  1705
7  020158       0201        615 37781   615
8  020159       0201       4663 37781  4663
9  020160       0201       2042 37781  2042
10 020250       0202       5022  6593  5022
11 020251       0202       1571  6593  1571
12 020350       0203       2481  4972  2481
13 020351       0203       1077  4972  1077
14 020353       0203        973  4972   973
15 020354       0203        441  4972   441
16 020450       0204       4689  4689  4689
17 020550       0205       4884  7146  4884
18 020551       0205        875  7146   875
19 020552       0205       1046  7146  1046
20 020553       0205        341  7146   341

推荐阅读