首页 > 解决方案 > 无法使用描述性函数运行 dplyr::summarize 函数:“如果 'na.rm' 为 FALSE,则不允许缺少值和 NaN”

问题描述

我正在尝试使用 dplyr 的 group by/summarize_all 运行一些描述性变量,但找不到传递“na.rm = TRUE”的方法。

该代码基于我在这里找到的东西来创建按变量分组的描述性统计:

df_grouped <-  df %>%  group_by (country, election) %>% 
  summarise_all(list(min=min, Q1=~quantile(., probs = 0.25),
                     median=median, Q3=~quantile(., probs = 0.75),
                     max=max), na.rm = TRUE))

错误:

Problem with `summarise()` input `cong_LH_all_Q1`.
x missing values and NaN's not allowed if 'na.rm' is FALSE
i Input `cong_LH_all_Q1` is `(structure(function (..., .x = ..1, .y = ..2, . = ..1) ...`.
i The error occurred in group 2: country = "Argentina", election = "ARG_2015".
Run `rlang::last_error()` to see where the error occurred.

这里似乎有一个解决方案,但我找不到一种方法来适应我的情况。

我使用的数据集示例:

df<- structure(list(country = c("Norway", "Denmark", "Australia", 
"Mexico", "New Zealand", "Hong Kong", "Great Britain", "Taiwan", 
"Republic of Korea", "Hungary", "Peru", "Russian Federation", 
"Netherlands", "Sweden", "Austria", "Ireland", "France", "Japan", 
"Great Britain", "Canada", "Australia", "Denmark", "Australia", 
"Switzerland", "Brazil", "Serbia", "Switzerland", "Brazil", "Mexico", 
"Ireland", "Finland", "South Africa", "Peru", "Australia", "Romania", 
"Kenya", "Denmark", "New Zealand", "Philippines", "United States of America", 
"Spain", "Norway", "Ukraine", "Hungary", "Hungary", "Canada", 
"Japan", "Turkey", "Latvia", "Taiwan", "Ireland", "Chile", "New Zealand", 
"Philippines", "Czech Republic", "Japan", "Iceland", "Netherlands", 
"Netherlands", "Republic of Korea", "Hong Kong", "Peru", "Romania", 
"New Zealand", "Peru", "Uruguay", "United States of America", 
"Canada", "Albania", "Mexico", "Sweden", "Great Britain", "Spain", 
"Slovenia", "Switzerland", "Brazil", "Turkey", "Bulgaria", "Czech Republic", 
"Finland", "Norway", "Ireland", "Canada", "United States of America", 
"Iceland", "Argentina", "Switzerland", "Ireland", "Norway", "Uruguay", 
"United States of America", "New Zealand", "Russian Federation", 
"Israel", "Australia", "New Zealand", "Finland", "Canada", "Canada", 
"New Zealand", "Taiwan", "Mexico", "Hong Kong", "New Zealand", 
"Republic of Korea", "Japan", "Canada", "Hungary", "Mexico", 
"Czech Republic", "Bulgaria", "Portugal", "Australia", "Slovenia", 
"Sweden", "Belgium", "Norway", "Peru", "Ireland", "Iceland", 
"Argentina", "Republic of Korea", "Greece", "Thailand", "Czech Republic", 
"Great Britain", "Brazil", "Romania", "Canada", "Republic of Korea"
), election = c("NOR_1997", "DNK_2001", "AUS_2004", "MEX_2000", 
"NZL_2014", "HKG_1998", "GBR_2015", "TWN_1996", "KOR_2000", "HUN_1998", 
"PER_2016", "RUS_1999", "NLD_2010", "SWE_2014", "AUT_2008", "IRL_2002", 
"FRA_2007", "JPN_2004", "GBR_1997", "CAN_2015", "AUS_2007", "DNK_2001", 
"AUS_2007", "CHE_2011", "BRA_2014", "SRB_2012", "CHE_1999", "BRA_2002", 
"MEX_2009", "IRL_2011", "FIN_2015", "ZAF_2014", "PER_2016", "AUS_2004", 
"ROU_2004", "KEN_2013", "DNK_1998", "NZL_1996", "PHL_2016", "USA_2008", 
"ESP_1996", "NOR_2001", "UKR_1998", "HUN_1998", "HUN_1998", "CAN_2011", 
"JPN_2004", "TUR_2015", "LVA_2011", "TWN_2001", "IRL_2002", "CHL_2005", 
"NZL_2014", "PHL_2016", "CZE_2010", "JPN_2004", "ISL_2013", "NLD_2006", 
"NLD_2010", "KOR_2000", "HKG_2000", "PER_2001", "ROU_2004", "NZL_2002", 
"PER_2016", "URY_2009", "USA_2004", "CAN_2015", "ALB_2005", "MEX_2012", 
"SWE_2006", "GBR_2015", "ESP_2000", "SVN_2008", "CHE_2007", "BRA_2002", 
"TUR_2011", "BGR_2014", "CZE_2013", "FIN_2011", "NOR_1997", "IRL_2011", 
"CAN_2004", "USA_2008", "ISL_2009", "ARG_2015", "CHE_2011", "IRL_2002", 
"NOR_2005", "URY_2009", "USA_2004", "NZL_1996", "RUS_1999", "ISR_2013", 
"AUS_2004", "NZL_2008", "FIN_2007", "CAN_2015", "CAN_2011", "NZL_2011", 
"TWN_2001", "MEX_2009", "HKG_2008", "NZL_1996", "KOR_2000", "JPN_2007", 
"CAN_2008", "HUN_1998", "MEX_2006", "CZE_2010", "BGR_2014", "PRT_2005", 
"AUS_2013", "SVN_2004", "SWE_2014", "BEL_2003", "NOR_2001", "PER_2011", 
"IRL_2007", "ISL_2009", "ARG_2015", "KOR_2000", "GRC_2012", "THA_2011", 
"CZE_2006", "GBR_1997", "BRA_2006", "ROU_2004", "CAN_2015", "KOR_2004"
), cong_LH_all = c(-4999996.5, -4999996, -9999990, NA, -1, -4999995, 
NA, -4999995.5, -4999996.5, -2, -4999995, 0, -4999997, -4999997.5, 
-9999991, -9999987, -4999996, NA, -4999997, NA, -4999996.5, -4999997, 
-4999995, NA, NA, -9999995, -4999997.5, NA, -4999996.5, -4999993, 
-9999993, -4999997, -4999995, -9999988, NA, NA, -4999996.5, 0, 
-9999993, -9999985, -4999995.5, -4999998, NA, NA, NA, -9999989, 
NA, -4999995, -4999995.5, NA, -4999994.5, -4999995, -7, -9999983, 
-9999991, NA, -4999995, -4999994.5, -4999994, -4999997.5, -4999994.5, 
-4999995, -9999986.5, NA, -9999990, -4999995.5, -4999994.5, -4999997.5, 
0, -4999994.5, NA, -4999995.5, -9999990, -9999992, -4999996, 
NA, -4999997, -4999997.5, -9999992, -4999993.5, -7889997.5, -9999989.5, 
-4999993.5, -4999997.5, -9999989, NA, NA, -4999996, -9999989.5, 
-4999992.5, -4999995, -2, -8, -4999996, -4999995, -2, -4999996.5, 
NA, NA, NA, NA, NA, -4999996, -2.5, NA, -9999990, -4999995, -1, 
-4999997.5, -4999994, -4999997.5, -9999991, -4999995, -9999991.5, 
-4999996, -9999985, -4999996.5, -4999997, -9999992, -9999991, 
-9999990, -4999994, NA, NA, -4999997.5, -9999990, NA, -8210003.5, 
NA, NA), exp_cong_LH_all = c(-9999991, -4999996, -4999995.5, 
NA, -2, -4999995.5, NA, -4999995.5, -4999996.5, -2.5, -4999996, 
-5, -4999996.5, -4999996.5, -9999991, -9999987, -4999997, NA, 
-4999996.5, NA, -4999996, -4999997.5, -4999997, NA, NA, -9999995, 
-4999998, NA, -4999995.5, -4999994.5, -9999993, -4999996, -4999994.5, 
-9999988, NA, NA, -4999997, -1, -9999993, -9999985, -4999994.5, 
-4999995.5, -7, NA, NA, -4999996, NA, -4999995.5, -4999994.5, 
NA, -4999994.5, -4999994, -1, -9999983, -9999991, NA, -4999995.5, 
-4999994, -4999994, -4999999, -4999996, -9999990, -9999986.5, 
NA, -4999996.5, -4999996, -4999996, -4999996, -3.5, -4999994.5, 
NA, -4999996, -9999990, -9999992, -4999996.5, NA, -4999998, -4999999, 
-9999992, -4999994.5, -7889997.5, -9999989.5, -4999995, -4999996.5, 
-9999989, NA, NA, -4999996.5, -9999989.5, -4999993.5, -4999994, 
-4, -3, -4999996, -4999995.5, -1, -4999996.5, NA, NA, -1, NA, 
NA, -4999996.5, -2.5, NA, -9999990, -4999996, 0, -4999996, -4999994, 
-4999999, -9999991, -4999995.5, -9999991.5, -4999996.5, -4999994.5, 
-4999996.5, -4999997, -9999992, -9999991, -4999995.5, -4999994.5, 
NA, NA, -4999998, -9999990, NA, -8210003.5, NA, NA), meanv_cong_LH_all = c(-9999991, 
-4999995.80586283, -4999996.01771117, NA, -1.35323866282033, 
-4999995.50472973, NA, -4999995.27892562, -4999996.22057143, 
-1.80247213606773, -4999994.89092873, -3.12671232876712, -4999996.73983096, 
-4999996.592711, -9999991, -9999987, -4999996.59302922, NA, -4999996.48278034, 
NA, -4999995.67434421, -4999997.98937865, -4999995.6932351, NaN, 
NA, -9999995, -4999998.14614499, NA, -4999995.9920279, -4999994.09825175, 
-9999993, -4999996.26961679, -4999994.98146718, -9999988, NA, 
NA, -4999996.820478, -0.825446568473833, -9999993, -9999985, 
-4999995.41772152, -4999995.72854134, NaN, NaN, NaN, -4999995.78947368, 
NA, -4999996.48796389, -4999994.64504881, NA, -4999994.53212851, 
-4999993.34686347, -2.08479532163743, -9999983, -9999991, NA, 
-4999995.38550247, -4999994.09418932, -4999993.83119219, -4999998.33127109, 
-4999994.83909574, -9999990, -9999986.5, NA, NaN, -4999996.33812155, 
-4999995.45712695, -4999996.19116398, -1.70591908091908, -4999994.74283207, 
NA, -4999994.86858407, -9999990, -9999992, -4999996.55981541, 
NA, -4999997.44994438, -4999998.0552546, -9999992, -4999994.48720137, 
-7889997.5, -9999989.5, -4999994.96545455, -4999995.10490463, 
-9999989, NA, NaN, -4999995.68173015, -9999989.5, -4999993.66206897, 
-4999994.15760266, -3.02943425076453, -4.87328767123288, -4999996.23019518, 
-4999995.34304775, -1.33935018050542, -4999996.56598361, NA, 
NA, -0.709143968871595, NA, NA, -4999995.17271157, -2.24531186956095, 
NA, -9999990, -4999995.65860401, -0.523975588491718, -4999996.05606061, 
-4999993.63230951, -4999998.0552546, -9999991, -4999995.46795612, 
-9999991.5, -4999996.36323155, NaN, -4999996.11256544, -4999995.59288194, 
-9999992, -9999991, NaN, -4999994.77942857, NA, NA, -4999998.02211435, 
-9999990, NA, -8210003.5, NA, NaN), voter_exp_dif_LH_all = c(0, 
0, 0, 0, 1, 0.5, 0, 2, 0, 4.5, 1, 5, 0.5, 1, 0, 0, 1, 0, 0.5, 
0, 0.5, 0.5, 2, NA, 0, 0, 0.5, 0, 2, 1.5, 0, 1, 0.5, 0, 0, 0, 
0.5, 1, 0, 0, 1, 2.5, NA, NA, NA, 0, 0, 0.5, 2, 0, 0, 1, 6, 0, 
0, 0, 0.5, 0.5, 1, 1.5, 1.5, 0, 0, 4, 0, 0.5, 2.5, 1.5, 3.5, 
2, 0, 0.5, 0, 0, 0.5, 0, 1, 1.5, 0, 1, 0, 0, 1.5, 4, 0, 0, NA, 
1.5, 0, 1, 1, 2, 5, 0, 0.5, 3, 0, 0, 0, NA, 0, 0, 2.5, 1, 2.5, 
0, 1, 1, 1.5, 0, 1.5, 0, 0.5, 0, 0.5, 0, 0, 0, 0, 0, 0, 0.5, 
0, NA, 0.5, 0, 0, 0, 0, NA), voter_meanv_dif_LH_PL = c(0.234183673469388, 
0.388274336283186, 0, 0, 0.179138321995465, 1.00945945945946, 
0, 0, 0, 4.12891986062718, 0.218142548596112, 3.12671232876712, 
1.4796619123085, 1.81457800511509, NaN, 0, 0, 0, 0, 0, 0, 1.97875730217738, 
0, NaN, NA, NaN, 1.29228998849252, 0, 0, 0, NaN, 4.53923357664234, 
0.0370656370656368, 0, 0, 0, 0.640956002172732, 0.825446568473833, 
0, 0, 0.164556962025316, 4.54291732490785, NaN, NaN, NaN, 0, 
0, 2.97592778335005, 1.70990237099024, 0, 0, 3.30627306273063, 
4.91520467836257, 0, 0, 0, 0.771004942339374, 0.811621368322399, 
1.66238437821172, 0, 0.678191489361702, 0.101661779081134, 0, 
6.04143862392494, NaN, 1.67624309392265, 0, 0, 2.25, 0, 0, 0, 
NaN, NaN, 1.11963081292155, 0, 0.899888765294772, 1.11050920910076, 
0, 1.97440273037543, 0, 0, 0, 0, 0, NA, NaN, 0, 0, 2.32413793103448, 
0, 1.02943425076453, 3.12671232876712, 0.460390355912744, 0, 
3.33935018050542, 0.131967213114754, 0, 0, NA, 0, 0, 0, 0.316070307595731, 
0, 0, 0, 1.52397558849172, 0, 0.735380980507975, 1.11050920910076, 
0, 0, 0, 0.7264631043257, NaN, 0.774869109947644, 2.81423611111111, 
0, 0, NaN, 0, NaN, NA, 1.04422869471413, 0, 0, 0, 0, NaN), cong_closest = c(-1, 
-1, NA, NA, 0, 0, NA, -1, -2, -2, 0, 0, 0, 0, -2, 0, 0, NA, -2, 
NA, -2, 0, 0, 0, NA, -1, 0, NA, 0, 0, -1, -3, 0, 0, NA, NA, -1, 
0, 0, 0, -1, 0, 0, 0, -1, NA, NA, 0, 0, NA, 0, 0, 0, 0, 0, NA, 
0, 0, -1, -2, 0, 0, -1, NA, 0, 0, -1, -2, 0, -2, NA, -1, -1, 
0, 0, NA, 0, 0, 0, 0, 0, -1, 0, -5, 0, NA, NA, -1, -1, 0, -4, 
0, 0, -1, 0, 0, 0, NA, NA, NA, NA, NA, 0, 0, NA, 0, 0, -1, -5, 
-1, 0, NA, 0, -1, 0, NA, 0, 0, -1, NA, 0, 0, NA, NA, 0, -1, NA, 
0, NA, 0), cong_closest_exp = c(NA, 0, -1, NA, -1, -1, NA, -1, 
-1, 0, 0, -2, 0, -1, 0, -1, 0, NA, 0, NA, -1, -1, -2, 0, NA, 
-3, -1, NA, -1, -3, 0, 0, -1, -1, NA, NA, 0, 0, 0, -2, 0, 0, 
-1, -2, 0, -1, NA, -1, 0, NA, 0, -1, -1, -3, 0, NA, 0, 0, -1, 
-1, 0, NA, -1, NA, 0, 0, 0, 0, -3, -1, NA, -1, NA, 0, 0, NA, 
-2, -3, -1, -1, NA, -1, -1, -3, 0, NA, 0, 0, -1, -2, -2, -1, 
-1, 0, 0, 0, 0, NA, NA, -1, NA, NA, 0, -1, NA, 0, 0, 0, 0, -1, 
-3, 0, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, NA, NA, -1, -1, NA, -1, 
NA, 0), cong_closest_meanv = c(-0.146556188503366, -0.378645833333334, 
-0.594634873323398, NaN, -0.5273390036452, -0.106280193236715, 
NA, -0.202380952380953, -0.213114754098361, -1.01231310466139, 
-0.679217958001448, -1.90327613104524, -0.365552699228792, -1.14507772020725, 
-0.200808625336927, -1.55851358846367, -0.429164504411002, NaN, 
-0.965560688786224, NaN, -0.348688419705694, -0.222043010752688, 
-0.651311580294306, -0.297255706591434, NA, -3.60599078341014, 
-2.02916666666667, NaN, -0.394220283533261, -2.1965034965035, 
-0.217857142857143, -0.306077348066298, -0.218142548596112, -0.0354223433242504, 
NaN, NA, -0.535480349344978, -0.323232323232323, -0.33271719038817, 
-4.34629929767693, -0.237392373923739, -0.438716465018412, -1.80363636363636, 
-0.930591259640103, -1.01231310466139, -0.949536560247168, NaN, 
-2.12789415656009, -0.0535475234270413, NaN, -0.0642570281124497, 
-0.965105601469238, -2.08479532163743, -3.90884955752212, -0.355878634639697, 
NaN, -0.506517690875233, -0.188378631677601, -0.2328125, -2.21311475409836, 
-0.0964705882352943, -0.0314341846758346, -0.135678391959799, 
NA, -0.0370656370656368, -0.98132183908046, -0.315205327413985, 
-0.149914821124361, -1.16183816183816, -0.949636261891438, NA, 
-0.73716814159292, -0.539143279172821, -0.0751104565537557, -0.202718006795017, 
NA, -0.899888765294772, -1.11050920910076, -0.841677943166441, 
-0.136026380873867, -0.2, -0.196503496503497, -1.7219730941704, 
-0.209809264305177, -0.528112449799197, NA, -0.698352344740178, 
-0.363460296965784, -0.852008456659619, -2.20134983127109, -2.31520532741398, 
-0.437139952558455, -0.107908351810791, -0.0410958904109586, 
-0.0642750373692076, -0.255223880597015, -0.131967213114754, 
NA, NA, -0.709143968871595, NA, NA, -0.260299625468165, -0.0294342507645258, 
NaN, -0.514563106796117, -0.000625390869293696, -0.128919860627177, 
-0.276595744680851, -0.106004901960785, -1.11050920910076, -0.321428571428571, 
-0.935912240184757, -0.343023255813954, -0.625348189415042, NaN, 
-0.225130890052356, -0.279111111111111, -0.130154639175258, -0.528112449799197, 
-0.087478559176672, -0.442857142857143, NA, NA, -1.04422869471413, 
-0.28648892905558, NA, -0.135678391959799, NA, -0.161317567567568
), dif_cls_LH_all = c(4999995.5, 4999995, NA, NA, 1, 4999995, 
NA, 4999994.5, 4999994.5, 0, 4999995, 0, 4999997, 4999997.5, 
9999989, 9999987, 4999996, NA, 4999995, NA, 4999994.5, 4999997, 
4999995, NA, NA, 9999994, 4999997.5, NA, 4999996.5, 4999993, 
9999992, 4999994, 4999995, 9999988, NA, NA, 4999995.5, 0, 9999993, 
9999985, 4999994.5, 4999998, NA, NA, NA, NA, NA, 4999995, 4999995.5, 
NA, 4999994.5, 4999995, 7, 9999983, 9999991, NA, 4999995, 4999994.5, 
4999993, 4999995.5, 4999994.5, 4999995, 9999985.5, NA, 9999990, 
4999995.5, 4999993.5, 4999995.5, 0, 4999992.5, NA, 4999994.5, 
9999989, 9999992, 4999996, NA, 4999997, 4999997.5, 9999992, 4999993.5, 
7889997.5, 9999988.5, 4999993.5, 4999992.5, 9999989, NA, NA, 
4999995, 9999988.5, 4999992.5, 4999991, 2, 8, 4999995, 4999995, 
2, 4999996.5, NA, NA, NA, NA, NA, 4999996, 2.5, NA, 9999990, 
4999995, 0, 4999992.5, 4999993, 4999997.5, NA, 4999995, 9999990.5, 
4999996, NA, 4999996.5, 4999997, 9999991, NA, 9999990, 4999994, 
NA, NA, 4999997.5, 9999989, NA, 8210003.5, NA, NA), knowledge_adj = c(1, 
NaN, 1, 0, 0.75, 1, 0.75, 1, NaN, 0.666666666666667, NaN, NaN, 
0.333333333333333, 0.5, 0.333333333333333, 0.333333333333333, 
1, 0.333333333333333, 1, NaN, 1, NaN, 0.333333333333333, 0.25, 
0.5, 0.5, 0.666666666666667, 0, 0.666666666666667, 1, 0.75, 0, 
NaN, 0.333333333333333, 0.666666666666667, 0, NaN, 1, 0, 0, 0.333333333333333, 
0.666666666666667, 0.333333333333333, 0.666666666666667, 0, 1, 
0, 0.75, 0.666666666666667, 0.333333333333333, 1, 0.5, 0.5, 0, 
0.666666666666667, 1, 0.5, 0.333333333333333, 0.333333333333333, 
NaN, 0.333333333333333, NaN, 1, 0.333333333333333, NaN, NaN, 
0, 1, 0.666666666666667, 0.25, NaN, 1, 0.333333333333333, NaN, 
1, 0.333333333333333, NaN, 0.25, 0.5, 0.333333333333333, 1, 0.666666666666667, 
0.666666666666667, 0, 0.666666666666667, 0, 0, 1, 0, NaN, 1, 
1, NaN, 0.5, 0.666666666666667, 0, 1, NaN, NaN, 1, 0, 0.666666666666667, 
0.333333333333333, 0.666666666666667, NaN, 0.666666666666667, 
NaN, 0.666666666666667, 1, 0.333333333333333, 0.25, 0.666666666666667, 
0, 1, 0.75, 0.666666666666667, 0.666666666666667, 0.333333333333333, 
1, 0.666666666666667, 0.5, NaN, 0.5, 0, 0.666666666666667, 0, 
0.666666666666667, 1, NaN, 1)), row.names = c(NA, -130L), class = "data.frame")

编辑:摘录了此处毫无意义的“选择变量”的摘录,并更正了组变量名称。

标签: rdplyr

解决方案


您的代码和数据中使用的变量不匹配,但通常您可以使用:

library(dplyr)
  
df %>% 
  select(var1:var10)  %>%  
  group_by(var1, var2) %>% 
  summarise(across(.fns = list(min=~min(., na.rm = TRUE), 
                               Q1=~quantile(., probs = 0.25),
                               median=median(., na.rm = TRUE), 
                               Q3=~quantile(., probs = 0.75),
                               max=~max(., na.rm = TRUE))))

推荐阅读