r - 根据条件在数据集中创建新行
问题描述
所以我有三列数据。您会注意到辛辛那提有 10 排,而克莱姆森有 8 排。
数据
structure(list(player_id = c(473L, 653L, 816L, 885L, 906L, 969L,
998L, 102L, 106L, 107L, 23L, 33L, 44L, 67L, 74L, 80L, 87L, 91L
), rating = c(0.8756, 0.8646, 0.8572, 0.8547, 0.8539, 0.8519,
0.8506, 0.8498, 0.8477, 0.8477, 0.9867, 0.9822, 0.9764, 0.9673,
0.9654, 0.9644, 0.9614, 0.9566), school = c("Cincinnati", "Cincinnati",
"Cincinnati", "Cincinnati", "Cincinnati", "Cincinnati", "Cincinnati",
"Cincinnati", "Cincinnati", "Cincinnati", "Clemson", "Clemson",
"Clemson", "Clemson", "Clemson", "Clemson", "Clemson", "Clemson"
)), class = "data.frame", row.names = c(NA, -18L))
player_id rating school
473 0.8756 Cincinnati
653 0.8646 Cincinnati
816 0.8572 Cincinnati
885 0.8547 Cincinnati
906 0.8539 Cincinnati
969 0.8519 Cincinnati
998 0.8506 Cincinnati
102 0.8498 Cincinnati
106 0.8477 Cincinnati
107 0.8477 Cincinnati
23 0.9867 Clemson
33 0.9822 Clemson
44 0.9764 Clemson
67 0.9673 Clemson
74 0.9654 Clemson
80 0.9644 Clemson
87 0.9614 Clemson
91 0.9566 Clemson
目标是,我试图通过创建一个基于当前评分字段平均值的“占位符”行来让任何没有 10 行到 10 行的学校。所以对于克莱姆森来说,这将包括两个“占位符”行,评级为 0.9701。所以最终的输出看起来像这样:
player_id rating school
473 0.8756 Cincinnati
653 0.8646 Cincinnati
816 0.8572 Cincinnati
885 0.8547 Cincinnati
906 0.8539 Cincinnati
969 0.8519 Cincinnati
998 0.8506 Cincinnati
102 0.8498 Cincinnati
106 0.8477 Cincinnati
107 0.8477 Cincinnati
23 0.9867 Clemson
33 0.9822 Clemson
44 0.9764 Clemson
67 0.9673 Clemson
74 0.9654 Clemson
80 0.9644 Clemson
87 0.9614 Clemson
91 0.9566 Clemson
0 0.9701 Clemson
0 0.9701 Clemson
实现这一目标的最有效方法是什么?
解决方案
我们可以在按“学校”分组后complete
的列的基础上扩展数据集,然后是“评分”的 元素row_number
replace
NA
mean
library(dplyr)
library(tidyr)
n <- 10
df1 %>%
group_by(school) %>%
mutate(rn = row_number()) %>%
ungroup %>%
complete(school, rn = unique(rn), fill = list(player_id = 0)) %>%
# // if all groups should be having fixed number of rows
# complete(school, rn = seq_len(n), fill = list(player_id = 0))
group_by(school) %>%
mutate(rating = replace_na(rating, mean(rating, na.rm = TRUE)))
# A tibble: 20 x 4
# Groups: school [2]
# school rn player_id rating
# <chr> <int> <dbl> <dbl>
# 1 Cincinnati 1 473 0.876
# 2 Cincinnati 2 653 0.865
#3 Cincinnati 3 816 0.857
# 4 Cincinnati 4 885 0.855
# 5 Cincinnati 5 906 0.854
# 6 Cincinnati 6 969 0.852
# 7 Cincinnati 7 998 0.851
# 8 Cincinnati 8 102 0.850
# 9 Cincinnati 9 106 0.848
#10 Cincinnati 10 107 0.848
#11 Clemson 1 23 0.987
#12 Clemson 2 33 0.982
#13 Clemson 3 44 0.976
#14 Clemson 4 67 0.967
#15 Clemson 5 74 0.965
#16 Clemson 6 80 0.964
#17 Clemson 7 87 0.961
#18 Clemson 8 91 0.957
#19 Clemson 9 0 0.970
#20 Clemson 10 0 0.970
使用 OP 的新数据集
n <- 20
out <- df1 %>%
group_by(school) %>%
filter(n() < n) %>%
mutate(rn = row_number()) %>%
ungroup %>%
complete(school, rn = seq_len(n), fill = list(rank= 0)) %>%
group_by(school) %>%
mutate(rating = replace_na(rating, mean(rating, na.rm = TRUE))) %>%
bind_rows(df1 %>%
group_by(school) %>%
filter(n() >=n))
range(table(out$school))
#[1] 20 57
或使用data.table
library(data.table)
library(zoo)
setDT(df1)[, .SD[seq_len(max(c(n, .N)))], school
][, rating := fifelse(is.na(rating), mean(rating, na.rm = TRUE), rating),
school][, lapply(.SD, na.locf0)]
数据
df1 <- structure(list(player_id = c(473L, 653L, 816L, 885L, 906L, 969L,
998L, 102L, 106L, 107L, 23L, 33L, 44L, 67L, 74L, 80L, 87L, 91L
), rating = c(0.8756, 0.8646, 0.8572, 0.8547, 0.8539, 0.8519,
0.8506, 0.8498, 0.8477, 0.8477, 0.9867, 0.9822, 0.9764, 0.9673,
0.9654, 0.9644, 0.9614, 0.9566), school = c("Cincinnati", "Cincinnati",
"Cincinnati", "Cincinnati", "Cincinnati", "Cincinnati", "Cincinnati",
"Cincinnati", "Cincinnati", "Cincinnati", "Clemson", "Clemson",
"Clemson", "Clemson", "Clemson", "Clemson", "Clemson", "Clemson"
)), class = "data.frame", row.names = c(NA, -18L))
推荐阅读
- ios - didReceiveNotificationResponse method is not called when the app gets launched through notification swipe from a closed state
- laravel - livewire v2 上的 Laravel 8 Mutators
- java - 如何检查用户是否输入了数字以外的任何内容?
- mongodb - 在 MongoDB 聚合中用 $cond 替换 $ifNull
- kubernetes - io.k8s.api.apps.v1.DeploymentSpec 中的未知字段“资源”
- ios - iOS 无法使用 FileManager 访问应用组文件夹
- ios - 从已经呈现的 UIViewController 呈现 UIImagePickerController
- c++ - C++ 不同运行的不同输出
- javascript - 如何从静态js文件Node.js + Vue.js + Webpack执行linux命令?
- nuxt.js - Nuxt 相当于 Vue Route Guards 'Next'