r - 有条件地对数据帧的每一行进行计数
问题描述
我有一个包含 97 个变量的数据框。对于变量 32 到 97,我需要计算每行中出现的值 1、2、3 和 4 的实例数(仅适用于从 32 到 97 的变量)。
下面是数据框的结构:
structure(list(subject_label = c("01", "01"),
event_code = c("b", "16"), site_id = c(5, 5), site_label = c("a",
"a"), event_label = c("Baseline",
NA), done = c(1L, NA), ndreason = c(NA_integer_, NA_integer_
), rater = c("OLD", NA), carerid = c(1L, NA), ntgph = c(4L,
NA), nt1ph = c(2L, NA), ntgmh = c(2L, NA), ntgse = c(0L,
NA), ntgxv = c(1L, NA), ntgxb = c(0L, NA), ntgxg = c(0L,
NA), ntgxh = c(0L, NA), ntgxd = c(0L, NA), ntgxa = c(0L,
NA), ntgxm = c(0L, NA), ntgxw = c(0L, NA), ntgxp = c(0L,
NA), ntgcd = c(0L, NA), ntgch = c(0L, NA), ntgcs = c(0L,
NA), ntgnr = c(0L, NA), ntgii = c(0L, NA), ntsem = c(0L,
NA), ntsei = c(0L, NA), ntsev = c(0L, NA), ntsey = c(NA_integer_,
NA_integer_), ntseo = c("Interpersonal conflict with co-workers",
NA), ntawb = c(0L, NA), ntahd = c(0L, NA), ntaid = c(0L,
NA), ntaui = c(0L, NA), ntahe = c(0L, NA), ntaub = c(0L,
NA), ntadi = c(1L, NA), ntatb = c(NA_integer_, NA_integer_
), ntcic = c(0L, NA), ntcfw = c(0L, NA), ntcfi = c(0L, NA
), ntclc = c(0L, NA), ntcnr = c(0L, NA), ntcnw = c(0L, NA
), ntses = c(0L, NA), ntsia = c(0L, NA), ntswf = c(1L, NA
), ntscn = c(0L, NA), ntssd = c(0L, NA), ntswn = c(0L, NA
), ntswe = c(0L, NA), ntssl = c(0L, NA), ntacw = c(0L, NA
), ntalb = c(0L, NA), ntacs = c(NA_integer_, NA_integer_),
ntafa = c(0L, NA), ntaaw = c(0L, NA), ntmrp = c(0L, NA),
ntmdr = c(0L, NA), ntmre = c(0L, NA), ntmfw = c(0L, NA),
ntmlt = c(4L, NA), ntmmo = c(0L, NA), ntmwp = c(0L, NA),
ntmpp = c(0L, NA), ntmpl = c(0L, NA), ntbaw = c(0L, NA),
ntbws = c(1L, NA), ntbwp = c(4L, NA), ntbla = c(0L, NA),
ntbow = c(0L, NA), ntbor = c(4L, NA), ntbhh = c(4L, NA),
ntbfo = c(0L, NA), ntbii = c(4L, NA), ntblc = c(0L, NA),
ntbaa = c(0L, NA), ntbad = c(0L, NA), ntbva = c(0L, NA),
ntbpa = c(0L, NA), ntbtt = c(0L, NA), ntbll = c(0L, NA),
ntbts = c(1L, NA), ntrca = c(0L, NA), ntrht = c(0L, NA),
ntrst = c(0L, NA), ntrct = c(0L, NA), ntrci = c(1L, NA),
ntrcm = c(0L, NA), ntoog = c(0L, NA), ntoop = c(0L, NA),
ntoof = c(0L, NA), ntooa = c(0L, NA), ntoow = c(0L, NA),
ntoov = c(0L, NA), ntgsp = c("a",
NA), update_stamp = c("a", NA), class = "data.frame")
我想做的是:
NTG$symptoms_morethanyear <- length(which(NTG[,c(32:97)] == 1))
然而,这似乎总结了这些字段中的所有内容并将整个总和放在最后一列中(不是每行单独进行)。
解决方案
如果您不介意将第 37 列修改为第 97 列,这是一个解决方案
library(tidyverse)
data_frame%>%
mutate_at(vars(37:97), list(~if_else(. %in% 1:4, 1, 0)))%>%
mutate(conts = rowSums(select(., 37:97)))%>%select(97:101)
ntoov ntgsp update_stamp conts
1 0 a a 10
2 0 <NA> <NA> 0
如果您不想修改第 32 到 97 列,也可以这样做:
library(data.table)
data_frame$counts <- rowSums(data_frame[,32:97] %between% c(1,4), na.rm=T)
data_frame%>%select(90:101)
ntrci ntrcm ntoog ntoop ntoof ntooa ntoow ntoov ntgsp update_stamp counts
1 1 0 0 0 0 0 0 0 a a 10
2 NA NA NA NA NA NA NA NA <NA> <NA> 0
推荐阅读
- bigcommerce - BigCommerce - 在 StencilJS 主题的订单确认页面上更改感谢文本
- javascript - 如何在Vuelayer中根据GeoJSON绘制所有国家边界?
- javascript - React JS 中的 JSON 流式传输
- html - 如何以角度连接 UL > LI 列表项(无 jquery)
- javascript - 清除 Cookie 和 localStorage 无法完全正常工作
- dialogflow-es - 如何在 Web 控制台之外使用 Dialogflow
- elixir - 为什么 ecto 无法使用“Group by”预加载查询?
- java - 使用 Spring AOP 清理记录器
- asp.net - 托管 ASP.Net MVC 站点
- node.js - Instagram 服务器端身份验证 2019