r - 使用 Dplyr 的变量中 2 个响应的百分比
问题描述
对于作业,我想查看变量 CIU 为 0 与 CIU 为 1 的受试者数量。
structure(list(Last_Name = c("Banks", "Beamon", "Dandridge",
"Deakle, Jr.", "Doyle", "Drinkard", "Ellis", "Embry", "Gaines",
"Gurley", "Hinton", "Holemon", "Holsomback", "Hunt", "Jones",
"Mahan", "Mahan", "McMillian", "Moore", "Padgett"), First_Name = c("Medell",
"Melvin Todd", "Beniah Alton", "Evan Lee", "Robert E.", "Gary",
"Andre", "Anthony", "Freddie Lee", "Timothy", "Anthony", "Jeffrey",
"John", "H. Guy", "Lydia Diane", "Dale", "Ronnie", "Walter",
"Daniel Wade", "Larry Randal"), Age = c("27", "24", "29", "59",
"44", "37", "35", "23", "22", "22", "29", "23", "33", "54", "40",
"22", "26", "45", "24", "40"), Race = c("Black", "Black", "Caucasian",
"Caucasian", "Caucasian", "Caucasian", "Black", "Black", "Black",
"Caucasian", "Black", "Caucasian", "Caucasian", "Caucasian",
"Black", "Caucasian", "Caucasian", "Black", "Caucasian", "Caucasian"
), Sex = c("Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Female",
"Male", "Male", "Male", "Male", "Male"), State = c("Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama", "Alabama", "Alabama", "Alabama", "Alabama", "Alabama",
"Alabama"), CIU = c(0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 1, 0), Guilty_Plea = c(1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), IO = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Worst_Crime = c(6, 1,
1, 4, 4, 1, 2, 1, 1, 6, 1, 2, 4, 6, 3, 2, 2, 1, 1, 1), Occurred = c(1999,
1988, 1994, 2014, 1991, 1993, 2012, 1992, 1972, 1999, 1985, 1987,
1987, 1987, 1997, 1983, 1983, 1986, 1999, 1990), Convicted = c(2001,
1989, 1996, 2015, 1992, 1995, 2013, 1993, 1974, 2000, 1986, 1988,
1988, 1993, 2000, 1986, 1986, 1988, 2002, 1992), Exonerated = c(2003,
1990, 2015, 2015, 2001, 2001, 2014, 1997, 1991, 2002, 2015, 1999,
2000, 1998, 2006, 1998, 1998, 1993, 2009, 1997), Sentence = c("15",
"25", "Life", "Not sentenced", "20", "Death", "85", "20", "30",
"35", "Death", "Life", "25", "Probation", "Life without parole",
"35", "Life without parole", "Death", "Death", "Death"), Death_Penalty = c(0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1), DNA_Only = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0), FC = c(1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), MWID = c(0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0), F_MFE = c(0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1), P_FA = c(1,
1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0), OM = c(1,
1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1), ILD = c(0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0), State_Statute = c("Y",
"Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y",
"Y", "Y", "Y", "Y", "Y", "Y"), State_Claim_Made = c(0, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), Zero_time = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Prem = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Pending = c(0,
0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), Denied = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), State_Award = c("0",
"0", "2", "0", "1", "0", "0", "0", "1", "0", "2", "0", "0", "0",
"0", "0", "0", "0", "0", "0"), Amount = c("0", "0", NA, "0",
"129041.88", "0", "0", "0", "1000000", "0", NA, "0", "0", "0",
"0", "0", "0", "0", "0", "0"), `Non-Statutory_Case_Filed` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0), No_Time = c(0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), Unfiled = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1), Dismissed = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0), Pending__1 = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Award = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0), Premature = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Amount__1 = c("0",
"0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "$ undisclosed", "0", "0"), Years_Lost = c(1.7,
0.1, 19.5, 0, 2.6, 5.7, 1.8, 4, 10.7, 1.5, 28.5, 10.6, 10.1,
0, 5.8, 11.4, 11.4, 4.5, 5.4, 5.5), State_Award2 = c("0", "0",
"0", "0", "1", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0",
"0", "0", "0", "0", "0")), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
使用 dplyr 包,我完成了很多工作:
CUI <- jail %>%
group_by(CIU) %>%
summarize(count = n())
现在我想创建一个表格,显示“State_Claim_Made”类别中每个组的百分比,但我不确定从这里做什么。最后,我希望看到 State_Claim_Made=0 与 State_Claim_Made=1 的 CUI=0 的百分比,CUI=1 的百分比相同;一个 2-2 的表格。我也更喜欢继续使用 dplyr 包,但不是必须的。
解决方案
你的例子并没有真正让我们看到全貌,所以让
df <- data.frame(CIU = rep(0:1, times = c(20, 30)),
State_Claim_Made = rep(1:0, times = c(15, 35)))
然后
table(CIU = df$CIU, State_Claim_Made = df$State_Claim_Made)
# State_Claim_Made
# CIU 0 1
# 0 5 15
# 1 30 0
table(CIU = df$CIU, State_Claim_Made = df$State_Claim_Made) / c(table(df$CIU))
# State_Claim_Made
# CIU 0 1
# 0 0.25 0.75
# 1 1.00 0.00
推荐阅读
- vb.net - SAPI SPEAK 如何做一个后台工作人员
- python - 如何在 scipy.optimize.minimize 中传递一组输入参数?
- xamarin - xamarin ios中的相机和图库
- javascript - 如何使用 findOneAndUpdate 在数组中添加信息而不删除先前包含的信息
- css - Z-index 在 Safari 中无法正确呈现 - 适用于所有其他浏览器
- r - 为什么 min/max/sum(c(NA, 4, 5), na.rm = "xyz") 工作,而具有相同输入的 mean() 不工作?
- python - 从查询集中删除一列
- php - 如何正确使用 PHPMailer
- c# - 无法将 json 数据从控制器发送到 ajax
- odbc - 我可以从基于 Web 的应用程序连接到 ODBC 数据库吗?