r - 如何按组计数并在R中的某个值处重新开始计数
问题描述
我想对按 id (PIZ) 分组的行进行简单计数。计数应在每个新 id (PIZ) 处重新开始,如果“Wert”列的值超过 500。
这是数据:
> dput(head(Labor_Neutrophile_alle2, n = 20))
structure(list(PIZ = c("37190985", "37190985", "37190985", "37190985",
"37190985", "37190985", "37190985", "37190985", "37190985", "30195566",
"30195566", "30195566", "30195566", "30195566", "30195566", "30195566",
"30195566", "30195566", "30195566", "30195566"), Datum = c(2019.98630136986,
2019.97808219178, 2019.97534246575, 2019.96712328767, 2019.95616438356,
2019.94794520548, 2019.94246575342, 2019.93698630137, 2019.93150684932,
2019.94520547945, 2019.92876712329, 2019.9095890411, 2019.8904109589,
2019.87123287671, 2019.85205479452, 2019.84931506849, 2019.84383561644,
2019.84109589041, 2019.82191780822, 2019.81369863014), Parameter = c("Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut"), Wert = c(20, 190, 320, 830, 1450, 870,
650, 1390, 1090, 1850, 1800, 1300, 400, 2800, 1360, 1200, 1510,
770, 2300, 2010), Einheit = c("10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL"), Neutrophilie = c(0,
0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
我想要的结果是:
> dput(head(Neutro, n = 20))
structure(list(...1 = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 19, 20), PIZ = c(10941890, 10942071,
11712886, 17089951, 17228870, 17250590, 17250590, 17250590, 17250590,
17250590, 17250590, 17250590, 17250590, 17250590, 17250590, 17250590,
17250590, 17250590, 17250590, 17250590), Datum = c(2019.189,
2014.195, 2014.942, 2015.203, 2014.323, 2013.764, 2013.841, 2014.09,
2014.115, 2014.134, 2014.156, 2014.189, 2014.192, 2014.195, 2014.197,
2014.2, 2014.205, 2014.208, 2014.211, 2014.214), Parameter = c("Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut", "Neutrophile absolut", "Neutrophile absolut",
"Neutrophile absolut"), Wert = c(4310, 5810, 3700, 3090, 3010,
1620, 1370, 2410, 2120, 710, 60, 20, 90, 120, 230, 220, 190,
180, 470, 1260), Einheit = c("10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL",
"10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL", "10^3/µL"), Neutrophilie = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), Count = c(1,
1, 1, 1, 1, 1, 2, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
我已经尝试了几件事,但是,你......:
xxx <- Labor_Neutrophile_alle2 %>%
group_by(PIZ, Wert > 500) %>%
summarise(count=n())
解决方案
你可以试试这个dplyr
方法。您按 分组PIZ
,然后添加一个虚拟变量以根据是否Wert
大于 500 拆分为新组。然后您只需在每个子组中生成一个整数序列。
请注意,您想要的结果与输入数据的数据完全不同——这可能是一个错误。
Labor_Neutrophile_alle2 %>%
group_by(PIZ) %>%
mutate(Count1 = cumsum(Wert > 500)) %>%
group_by(PIZ, Count1) %>%
mutate(Count = seq(n())) %>%
ungroup() %>%
dplyr::select(-c(Count1))
#> # A tibble: 20 x 7
#> PIZ Datum Parameter Wert Einheit Neutrophilie Count
#> <chr> <dbl> <chr> <dbl> <chr> <dbl> <int>
#> 1 37190985 2020. Neutrophile absolut 20 10^3/µL 0 1
#> 2 37190985 2020. Neutrophile absolut 190 10^3/µL 0 2
#> 3 37190985 2020. Neutrophile absolut 320 10^3/µL 0 3
#> 4 37190985 2020. Neutrophile absolut 830 10^3/µL 1 1
#> 5 37190985 2020. Neutrophile absolut 1450 10^3/µL 1 1
#> 6 37190985 2020. Neutrophile absolut 870 10^3/µL 1 1
#> 7 37190985 2020. Neutrophile absolut 650 10^3/µL 1 1
#> 8 37190985 2020. Neutrophile absolut 1390 10^3/µL 1 1
#> 9 37190985 2020. Neutrophile absolut 1090 10^3/µL 1 1
#> 10 30195566 2020. Neutrophile absolut 1850 10^3/µL 1 1
#> 11 30195566 2020. Neutrophile absolut 1800 10^3/µL 1 1
#> 12 30195566 2020. Neutrophile absolut 1300 10^3/µL 1 1
#> 13 30195566 2020. Neutrophile absolut 400 10^3/µL 0 2
#> 14 30195566 2020. Neutrophile absolut 2800 10^3/µL 1 1
#> 15 30195566 2020. Neutrophile absolut 1360 10^3/µL 1 1
#> 16 30195566 2020. Neutrophile absolut 1200 10^3/µL 1 1
#> 17 30195566 2020. Neutrophile absolut 1510 10^3/µL 1 1
#> 18 30195566 2020. Neutrophile absolut 770 10^3/µL 1 1
#> 19 30195566 2020. Neutrophile absolut 2300 10^3/µL 1 1
#> 20 30195566 2020. Neutrophile absolut 2010 10^3/µL 1 1
推荐阅读
- snowflake-cloud-data-platform - Snowflake:允许用户更改他们的 RSA_PUBLIC_KEY 属性
- r - R:水平条形图标签
- html - TimeLine -- 单击时更改颜色
- tensorflow.js - 训练数据归一化后如何预测?
- ios - 如何将数据从父视图控制器传递到子容器视图控制器
- jquery - 引导轮播不起作用,在一列中显示图片
- javascript - 将新行动态添加到具有从 csv 文件创建的标题的数组中
- magento2 - 为什么我的 js 脚本不能在 Magento 2 类别产品中运行
- r - 文件中的错误(文件,“rt”):无法打开连接(寻找其他解决方案)
- windows - 使用批处理文件自动连接到 VNC