首页 > 解决方案 > R dplyr 如何根据单元格值修剪数据集?

问题描述

使用 dplyr,我正在尝试使用“rep number 15”来识别“set id”并让每个代表都使用相同的“set id”。我的目标是最终对单个“set id”运行一些汇总统计信息,但我需要先消除额外的数据。

编辑:使用 dplyr,我想使用“Rep Number”列中的值 15 来识别“set id”列中的相应值。这个值分配给前面的 14 行,我想保留其中所有的相应信息。还有其他“集合 id”(第 16-24 行)与“代表编号”15 不对应。这只是数据的一个子集,因此提供示例很简单,但完整集合还有更多不需要的数据行。

我了解识别值的基本代码,例如x <- rpdc$set id [rpdc$Rep Number == 15]y <- which(rpdc$Rep Number == 15),但似乎无法利用它来减少我的数据框。

我的样本数据如下

 dput(rpdc)
structure(list(exercise = c("Pullups - Chin", "Pullups - Chin", 
"Pullups - Chin", "Pullups - Chin", "Pullups - Chin", "Pullups - Chin", 
"Pullups - Chin", "Pullups - Chin", "Pullups - Chin", "Pullups - Chin", 
"Pullups - Chin", "Pullups - Chin", "Pullups - Chin", "Pullups - Chin", 
"Pullups - Chin", "Pullups - Chin", "Pullups - Chin", "Pullups - Chin", 
"Pullups - Chin", "Pullups - Chin", "Pullups - Chin", "Pullups - Chin", 
"Pullups - Chin", "Pullups - Chin", "Bench Press", "Bench Press", 
"Bench Press", "Bench Press", "Bench Press", "Bench Press", "Bench Press", 
"Bench Press", "Bench Press", "Bench Press", "Bench Press", "Bench Press", 
"Bench Press", "Bench Press", "Bench Press", "Bench Press", "Bench Press", 
"Bench Press", "Bench Press", "Bench Press", "Bench Press", "Bench Press", 
"Bench Press", "Bench Press", "Bench Press", "Bench Press", "Bench Press", 
"Bench Press", "Bench Press", "Bench Press"), `set id` = c(-2688, 
-2688, -2688, -2688, -2688, -2688, -2688, -2688, -2688, -2688, 
-2688, -2688, -2688, -2688, -2688, -2686, -2686, -2686, -2684, 
-2684, -2684, -2683, -2683, -2683, -2682, -2682, -2682, -2682, 
-2682, -2682, -2682, -2682, -2682, -2682, -2682, -2682, -2682, 
-2682, -2682, -2674, -2674, -2674, -2674, -2674, -2674, -2674, 
-2674, -2674, -2674, -2674, -2674, -2674, -2674, -2674), `bar weight (KG)` = c(10, 
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 35, 35, 
35, 30, 30, 30, 20, 20, 20, 65, 65, 65, 65, 65, 65, 65, 65, 65, 
65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 
65, 65, 65, 65, 65), `Rep Number` = c(1, 2, 3, 4, 5, 6, 7, 8, 
9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3, 4, 5, 6, 7, 
8, 9, 10, 11, 12, 13, 14, 15), `Vertical Distance(")` = c(27.768, 
31.434, 29.654, 29.089, 30.183, 30.252, 28.825, 29.636, 28.161, 
27.808, 29.839, 30.077, 30.536, 32.136, 26.933, 26.72, 27.755, 
29.33, 27.563, 28.342, 27.366, 28.071, 28.11, 29.083, 19.411, 
20.489, 19.597, 19.438, 19.157, 19.733, 18.953, 18.883, 19.348, 
18.256, 18.482, 18.894, 18.321, 19.432, 19.087, 19.337, 19.593, 
19.319, 18.635, 19.164, 18.985, 18.469, 19.514, 18.52, 18.78, 
18.666, 18.948, 19.201, 19.864, 19.248), `Concentric Time(s)` = c(0.827, 
0.934, 0.867, 0.868, 0.908, 0.911, 0.95, 0.948, 0.951, 0.908, 
1.056, 0.972, 1.245, 1.339, 0.806, 0.969, 1.03, 1.171, 0.909, 
0.91, 0.989, 0.847, 0.805, 0.826, 0.563, 0.585, 0.585, 0.626, 
0.606, 0.686, 0.646, 0.625, 0.685, 0.726, 0.726, 0.807, 0.828, 
0.849, 0.868, 0.543, 0.565, 0.563, 0.565, 0.585, 0.585, 0.646, 
0.624, 0.646, 0.666, 0.665, 0.706, 0.727, 0.768, 0.788), `Mean Force(N)` = c(934.493, 
929.081, 930.822, 929.073, 932.432, 926.381, 932.145, 928.64, 
923.847, 933.126, 926.17, 941.675, 931.5, 928.348, 975.2, 1180.886, 
1181.97, 1178.66, 1129.274, 1127.994, 1125.706, 1036.935, 1040.416, 
1033.517, 663.722, 676.834, 655.584, 656.418, 672.06, 653.77, 
656.407, 662.05, 657.197, 656.23, 662.913, 653.854, 648.483, 
655.211, 651.538, 658.221, 666.012, 676.698, 683.59, 662.983, 
674.509, 664.292, 665.332, 667.18, 659.45, 659.756, 653.508, 
665.657, 652.489, 648.576), `Mean Power(W)` = c(775.174, 782.296, 
790.43, 773.774, 768.733, 769.255, 703.302, 723.227, 686.334, 
708.397, 658.426, 767.898, 576.253, 560.252, 803.432, 808.339, 
789.61, 735.055, 850.053, 872.67, 776.319, 847.193, 890.844, 
897.497, 564.985, 594.033, 554.204, 507.104, 515.873, 468.272, 
475.517, 493.19, 456.333, 401.521, 410.119, 374.523, 353.282, 
367.442, 352.025, 580.428, 577.282, 569.644, 546.341, 534.244, 
532.895, 459.955, 506.744, 462.765, 453.095, 448.575, 428.095, 
426.065, 413.17, 389.76), `Mean Velocity(m/s)` = c(0.836, 0.845, 
0.853, 0.835, 0.83, 0.831, 0.759, 0.781, 0.741, 0.765, 0.711, 
0.828, 0.622, 0.605, 0.837, 0.689, 0.674, 0.627, 0.757, 0.778, 
0.692, 0.826, 0.868, 0.876, 0.851, 0.87, 0.829, 0.771, 0.787, 
0.715, 0.729, 0.75, 0.702, 0.625, 0.635, 0.584, 0.552, 0.572, 
0.549, 0.877, 0.859, 0.85, 0.819, 0.812, 0.807, 0.711, 0.777, 
0.713, 0.701, 0.697, 0.667, 0.659, 0.645, 0.609)), row.names = c(NA, 
-54L), groups = structure(list(exercise = c("Bench Press", "Bench Press", 
"Pullups - Chin", "Pullups - Chin", "Pullups - Chin", "Pullups - Chin"
), `set id` = c(-2682, -2674, -2688, -2686, -2684, -2683), .rows = structure(list(
    25:39, 40:54, 1:15, 16:18, 19:21, 22:24), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 6L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

标签: rselectfilterdplyr

解决方案


如果我们只需要获取所有'exercise'的第1到15行,则按'exercise'重新分组(OP数据按'exercise'和'set id'分组)

library(dplyr)
rpdc %>% 
   group_by(exercise) %>%
   filter(row_number() <= 15) -> rpdc2

推荐阅读