首页 > 解决方案 > 如何按组计数并在R中的某个值处重新开始计数

问题描述

我想对按 id (PIZ) 分组的行进行简单计数。计数应在每个新 id (PIZ) 处重新开始,如果“Wert”列的值超过 500。

这是数据:

> dput(head(Labor_Neutrophile_alle2, n = 20))
structure(list(PIZ = c("37190985", "37190985", "37190985", "37190985", 
"37190985", "37190985", "37190985", "37190985", "37190985", "30195566", 
"30195566", "30195566", "30195566", "30195566", "30195566", "30195566", 
"30195566", "30195566", "30195566", "30195566"), Datum = c(2019.98630136986, 
2019.97808219178, 2019.97534246575, 2019.96712328767, 2019.95616438356, 
2019.94794520548, 2019.94246575342, 2019.93698630137, 2019.93150684932, 
2019.94520547945, 2019.92876712329, 2019.9095890411, 2019.8904109589, 
2019.87123287671, 2019.85205479452, 2019.84931506849, 2019.84383561644, 
2019.84109589041, 2019.82191780822, 2019.81369863014), Parameter = c("Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut"), Wert = c(20, 190, 320, 830, 1450, 870, 
650, 1390, 1090, 1850, 1800, 1300, 400, 2800, 1360, 1200, 1510, 
770, 2300, 2010), Einheit = c("10^3/µL", "10^3/µL", "10^3/µL", 
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", 
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", 
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL"), Neutrophilie = c(0, 
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

我想要的结果是:

> dput(head(Neutro, n = 20))
structure(list(...1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20), PIZ = c(10941890, 10942071, 
11712886, 17089951, 17228870, 17250590, 17250590, 17250590, 17250590, 
17250590, 17250590, 17250590, 17250590, 17250590, 17250590, 17250590, 
17250590, 17250590, 17250590, 17250590), Datum = c(2019.189, 
2014.195, 2014.942, 2015.203, 2014.323, 2013.764, 2013.841, 2014.09, 
2014.115, 2014.134, 2014.156, 2014.189, 2014.192, 2014.195, 2014.197, 
2014.2, 2014.205, 2014.208, 2014.211, 2014.214), Parameter = c("Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut", 
"Neutrophile absolut"), Wert = c(4310, 5810, 3700, 3090, 3010, 
1620, 1370, 2410, 2120, 710, 60, 20, 90, 120, 230, 220, 190, 
180, 470, 1260), Einheit = c("10^3/µL", "10^3/µL", "10^3/µL", 
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", 
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", 
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL"), Neutrophilie = c(1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), Count = c(1, 
1, 1, 1, 1, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

我已经尝试了几件事,但是,你......:

xxx <- Labor_Neutrophile_alle2 %>% 
   group_by(PIZ, Wert > 500) %>% 
   summarise(count=n())

标签: r

解决方案


你可以试试这个dplyr方法。您按 分组PIZ,然后添加一个虚拟变量以根据是否Wert大于 500 拆分为新组。然后您只需在每个子组中生成一个整数序列。

请注意,您想要的结果与输入数据的数据完全不同——这可能是一个错误。

Labor_Neutrophile_alle2 %>%
  group_by(PIZ) %>%
  mutate(Count1 = cumsum(Wert > 500)) %>%
  group_by(PIZ, Count1) %>%
  mutate(Count = seq(n())) %>% 
  ungroup() %>% 
  dplyr::select(-c(Count1))
#> # A tibble: 20 x 7
#>    PIZ      Datum Parameter            Wert Einheit Neutrophilie Count
#>    <chr>    <dbl> <chr>               <dbl> <chr>          <dbl> <int>
#>  1 37190985 2020. Neutrophile absolut    20 10^3/µL            0     1
#>  2 37190985 2020. Neutrophile absolut   190 10^3/µL            0     2
#>  3 37190985 2020. Neutrophile absolut   320 10^3/µL            0     3
#>  4 37190985 2020. Neutrophile absolut   830 10^3/µL            1     1
#>  5 37190985 2020. Neutrophile absolut  1450 10^3/µL            1     1
#>  6 37190985 2020. Neutrophile absolut   870 10^3/µL            1     1
#>  7 37190985 2020. Neutrophile absolut   650 10^3/µL            1     1
#>  8 37190985 2020. Neutrophile absolut  1390 10^3/µL            1     1
#>  9 37190985 2020. Neutrophile absolut  1090 10^3/µL            1     1
#> 10 30195566 2020. Neutrophile absolut  1850 10^3/µL            1     1
#> 11 30195566 2020. Neutrophile absolut  1800 10^3/µL            1     1
#> 12 30195566 2020. Neutrophile absolut  1300 10^3/µL            1     1
#> 13 30195566 2020. Neutrophile absolut   400 10^3/µL            0     2
#> 14 30195566 2020. Neutrophile absolut  2800 10^3/µL            1     1
#> 15 30195566 2020. Neutrophile absolut  1360 10^3/µL            1     1
#> 16 30195566 2020. Neutrophile absolut  1200 10^3/µL            1     1
#> 17 30195566 2020. Neutrophile absolut  1510 10^3/µL            1     1
#> 18 30195566 2020. Neutrophile absolut   770 10^3/µL            1     1
#> 19 30195566 2020. Neutrophile absolut  2300 10^3/µL            1     1
#> 20 30195566 2020. Neutrophile absolut  2010 10^3/µL            1     1

推荐阅读