r - 从其他多列的值有条件地替换多列的值
问题描述
假设我有这个数据集:
set.seed (1234);
data.frame(cbind(a=rep(c("si","no"),30),b=rnorm(60)),
c=rep(c("d","e","f"),20)) %>% head()
然后我想添加许多列(在这个例子中我只添加了两个),以识别每个组之间的不同情况(在这种情况下,列“a”)。
set.seed(1234);
data.frame(cbind(a=rep(c("si","no"),30),b=rnorm(60)),c=rep(c("d","e","f"),20)) %>%
group_by(a) %>% dplyr::mutate_at(vars(c(b,c)), .funs= list(dups_hash_ing= ~n_distinct(.)))
此代码留下以下数据集:
如果我用 设置数据集dput
,结果是
structure(list(a = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L), .Label = c("no", "si"), class = "factor"), b = structure(c(22L,
1L, 51L, 34L, 50L, 57L, 53L, 10L, 47L, 3L, 11L, 23L, 15L, 38L,
58L, 39L, 41L, 17L, 28L, 21L, 37L, 45L, 29L, 46L, 32L, 48L, 56L,
52L, 26L, 19L, 35L, 8L, 55L, 20L, 9L, 36L, 2L, 12L, 6L, 42L,
49L, 43L, 59L, 54L, 31L, 13L, 60L, 44L, 14L, 30L, 7L, 5L, 16L,
27L, 33L, 18L, 24L, 4L, 25L, 40L), .Label = c("-0.0997905884418961",
"-0.151736536534977", "-0.198416273822079", "-0.254874652654534",
"-0.274704218225806", "-0.304721068966714", "-0.324393300483657",
"-0.400235237343163", "-0.415751788401515", "-0.50873701541522",
"-0.538070788884863", "-0.60615111526422", "-0.659770093821306",
"-0.684320344136007", "-0.789646852263761", "-0.933503340589868",
"-0.965903210133575", "-1.07754212275943", "-1.11444896479736",
"-1.60708093984972", "-2.07823754188738", "-2.7322195229558",
"-2.85575865501923", "-3.23315213292314", "0.0295178303214797",
"0.0326639575014441", "0.116845344986082", "0.162654708118265",
"0.185513915583057", "0.186492083080971", "0.287709728313787",
"0.311681028661359", "0.319160238648117", "0.413868915451097",
"0.418057822385083", "0.42200837321742", "0.485226820569252",
"0.487814635163685", "0.500694614280786", "0.594273774110513",
"0.62021020366732", "0.629536099884472", "0.660212631820405",
"0.677415500438328", "0.696768778564913", "0.700733515544461",
"0.704180178465512", "0.760462361967838", "0.895171980275539",
"0.912322161610113", "0.976031734922396", "1.1123628412626",
"1.16910851401363", "1.17349757263239", "1.49349310261748", "1.84246362620766",
"1.98373220068438", "2.16803253951933", "2.27348352044748", "2.91914013071762"
), class = "factor"), c = structure(c(1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L,
3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L,
1L, 2L, 3L, 1L, 2L, 3L), .Label = c("d", "e", "f"), class = "factor"),
a_dups_hash_ing = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), b_dups_hash_ing = c(30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L), c_dups_hash_ing = c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L), groups = structure(list(
a = structure(1:2, .Label = c("no", "si"), class = "factor"),
.rows = list(c(2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L,
22L, 24L, 26L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L, 44L,
46L, 48L, 50L, 52L, 54L, 56L, 58L, 60L), c(1L, 3L, 5L, 7L,
9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 27L, 29L, 31L,
33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L, 49L, 51L, 53L, 55L,
57L, 59L))), row.names = c(NA, -2L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
我需要做的是,如果不同案例的数量超过每组一个,则逐列替换为原始列的值。我必须为超过 50 列执行此操作。将仅为具有 mutate 的一列提供这样的示例:
dplyr::mutate(b_dups_hash_ing= ifelse(>1,b,0))
我需要为许多变量重复上面提供的代码。这与 a 非常相似mutate_at
(括号中的词是我会做的)。以下示例不起作用,但这是我在理想世界中会做的事情,只是为了让您更好地理解我的问题。
dplyr::mutate_at(vars(contains('_dups_hash_ing')), .funs = list(~ifelse(.>1,vars([original]),0)))
解决方案
这是你要找的吗?
df %>% dplyr::mutate_at(vars(contains('_dups_hash_ing')), ~ ifelse(. > 1, ., 0)) %>% head
#> # A tibble: 6 x 6
#> # Groups: a [2]
#> a b c a_dups_hash_ing b_dups_hash_ing c_dups_hash_ing
#> <fct> <fct> <fct> <dbl> <int> <int>
#> 1 si -2.7322195229558 d 0 30 3
#> 2 no -0.09979058844189… e 0 30 3
#> 3 si 0.976031734922396 f 0 30 3
#> 4 no 0.413868915451097 d 0 30 3
#> 5 si 0.912322161610113 e 0 30 3
#> 6 no 1.98373220068438 f 0 30 3
推荐阅读
- c# - 使用 IoC 容器作为某些服务的服务定位器以获得机会创建子范围
- ruby-on-rails - 您如何将 Rails 应用程序与应用程序引擎上的 postgres 数据库连接起来?
- javascript - 在终端和浏览器中显示 console.log 并做出反应
- python - 将列表列表转换为 CNN 的张量输入
- css - 我有一个创建可扩展导航菜单的 CSS 样式规则错误
- wordpress - 使用 Elementor 编辑 WordPress 标头
- swiftui - SwiftUI 中有没有办法实现两个数字之间的滚动动画
- java - 打印 "XX"KB="YY"MB 和 "ZZ"KB
- html - 将浏览器同步连接到 Windows 10
- reactjs - 如何在提交时关闭 Reactjs-popup 模式?