首页 > 解决方案 > 将 NA 计为每个变量中的一个因素

问题描述

我有按组计算值频率的代码(见下文)。

 mtcars$disp[4:9]<-NA
    mtcars$hp[9:12]<-NA
    mtcars$mpg[10:12] <- NA

    varList <- 2:4
    lapply(varList,function(x,df,byVar){ 
             tabular((Factor(df[[x]],paste(colnames(df)[x])) + 1) ~ ((Factor(df[[byVar]],paste(byVar)))*((n=1) + Percent("col"))),
                 data= df) 
      },mtcars,"cyl")

我想调整代码,以便它也计算每个变量中的 NA 数量。

因此,例如,如果区域中有缺失,它会在我的输出中显示如下:区域中实际上没有 NA,因此该数据是假的。

           sexe                                
                            F            M           O          
 region                     n    Percent n   Percent n   Percent
 Alsace                       6    3.922   3   1.515   6   3.704
 Aquitaine                    0    0.000   6   3.030   6   3.704
 Auvergne                     6    3.922   6   3.030   3   1.852
 Bourgogne                    3    1.961   3   1.515   0   0.000
 Bretagne                     3    1.961   0   0.000  12   7.407
 Centre                      15    9.804  21  10.606  21  12.963
 Champagne-Ardenne            3    1.961   0   0.000   6   3.704
 Corse                        6    3.922   9   4.545   3   1.852
 Franche-Comté                9    5.882  15   7.576  12   7.407
 Haute-Normandie              3    1.961   0   0.000   0   0.000
 Île-de-France                0    0.000   0   0.000   0   0.000
 Languedoc-Roussillon         6    3.922  15   7.576   3   1.852
 Lorraine                     3    1.961   6   3.030   0   0.000
 Midi-Pyrénées               15    9.804   0   0.000   0   0.000
 Nord-Pas-de-Calais           3    1.961   9   4.545   6   3.704
 Pays de la Loire             0    0.000   0   0.000   3   1.852
 Picardie                     6    3.922   9   4.545  12   7.407
 Poitou-Charentes            12    7.843   6   3.030  12   7.407
 Provence-Alpes-Côte d'Azur   6    3.922   0   0.000   3   1.852
 Rhône-Alpes                 12    7.843  30  15.152  15   9.259
 **NA                          5     4.6    5    5       5    5**
 All                        153  100.000 198 100.000 162 100.000

任何帮助表示赞赏!

标签: rtidyverse

解决方案


根据tables包,一个选项是转换为factorwithexclude = NULL

library(tables)
mtcars$cyl[4:7] <- NA
mtcars$cyl <- factor(mtcars$cyl, exclude =NULL )

- 运行 OP 的代码

varList <- 2:4
lapply(varList,function(x,df,byVar){ 
          tabular((Factor(df[[x]],paste(colnames(df)[x])) + 1) ~ ((Factor(df[[byVar]],paste(byVar)))*((n=1) + Percent("col"))),
              data= df) 
   },mtcars,"cyl")
#[[1]]
                                                
#     cyl                                        
#     4           6         8          NA        
# cyl n   Percent n Percent n  Percent n  Percent
# 4   11  100     0   0      0   0     0    0    
# 6    0    0     5 100      0   0     0    0    
# 8    0    0     0   0     12 100     0    0    
# NA   0    0     0   0      0   0     4  100    
# All 11  100     5 100     12 100     4  100    

#[[2]]
                                                  
#       cyl                                        
#       4           6         8          NA        
# disp  n   Percent n Percent n  Percent n  Percent
# 71.1   1    9.091 0   0      0   0.000 0    0    
# 75.7   1    9.091 0   0      0   0.000 0    0    
# 78.7   1    9.091 0   0      0   0.000 0    0    
# 79     1    9.091 0   0      0   0.000 0    0    
# 95.1   1    9.091 0   0      0   0.000 0    0    
# 108    1    9.091 0   0      0   0.000 0    0    
# 120.1  1    9.091 0   0      0   0.000 0    0    
# 120.3  1    9.091 0   0      0   0.000 0    0    
# 121    1    9.091 0   0      0   0.000 0    0    
# 140.8  1    9.091 0   0      0   0.000 0    0    
# 145    0    0.000 1  20      0   0.000 0    0    
# 146.7  1    9.091 0   0      0   0.000 0    0    
# 160    0    0.000 2  40      0   0.000 0    0    
# 167.6  0    0.000 2  40      0   0.000 0    0    
# 225    0    0.000 0   0      0   0.000 1   25    
# 258    0    0.000 0   0      0   0.000 1   25    
# 275.8  0    0.000 0   0      3  25.000 0    0    
# 301    0    0.000 0   0      1   8.333 0    0    
# 304    0    0.000 0   0      1   8.333 0    0    
# 318    0    0.000 0   0      1   8.333 0    0    
# 350    0    0.000 0   0      1   8.333 0    0    
# 351    0    0.000 0   0      1   8.333 0    0    
# 360    0    0.000 0   0      0   0.000 2   50    
# 400    0    0.000 0   0      1   8.333 0    0    
# 440    0    0.000 0   0      1   8.333 0    0    
# 460    0    0.000 0   0      1   8.333 0    0    
# 472    0    0.000 0   0      1   8.333 0    0    
# All   11  100.000 5 100     12 100.000 4  100    

#[[3]]
                                                
#     cyl                                        
#     4           6         8          NA        
# hp  n   Percent n Percent n  Percent n  Percent
# 52   1    9.091 0   0      0   0.000 0    0    
# 62   1    9.091 0   0      0   0.000 0    0    
# 65   1    9.091 0   0      0   0.000 0    0    
# 66   2   18.182 0   0      0   0.000 0    0    
# 91   1    9.091 0   0      0   0.000 0    0    
# 93   1    9.091 0   0      0   0.000 0    0    
# 95   1    9.091 0   0      0   0.000 0    0    
# 97   1    9.091 0   0      0   0.000 0    0    
# 105  0    0.000 0   0      0   0.000 1   25    
# 109  1    9.091 0   0      0   0.000 0    0    
# 110  0    0.000 2  40      0   0.000 1   25    
# 113  1    9.091 0   0      0   0.000 0    0    
# 123  0    0.000 2  40      0   0.000 0    0    
# 150  0    0.000 0   0      2  16.667 0    0    
# 175  0    0.000 1  20      1   8.333 1   25    
# 180  0    0.000 0   0      3  25.000 0    0    
# 205  0    0.000 0   0      1   8.333 0    0    
# 215  0    0.000 0   0      1   8.333 0    0    
# 230  0    0.000 0   0      1   8.333 0    0    
# 245  0    0.000 0   0      1   8.333 1   25    
# 264  0    0.000 0   0      1   8.333 0    0    
# 335  0    0.000 0   0      1   8.333 0    0    
# All 11  100.000 5 100     12 100.000 4  100    

如果我们需要排除第 2 到 4 列的NA值,则转换为factor并使用exclude = NULL

mtcars[varList] <- lapply(mtcars[varList], factor, exclude = NULL)

推荐阅读