首页 > 解决方案 > 如何识别具有多个分组的异常值

问题描述

我正在尝试从特定relabs列的数据集中识别异常值,但我需要在Control列中分别计算它们的值为 1 和 2,其中conc列等于“NK”也分组为Treatment.

带有 reprex 的数据集(应该有 40 个异常值,手动检查):

df <- wrapr::build_frame(
   "ID"  , "Treatment", "conc"   , "relabs", "Control" |
     1   , "A"        , "100 µM" , 0.9161  , 0         |
     2   , "A"        , "100 µM" , 0.8023  , 0         |
     3   , "A"        , "100 µM" , 0.7189  , 0         |
     4   , "A"        , "100 µM" , 0.7234  , 0         |
     5   , "A"        , "100 µM" , 0.6265  , 0         |
     6   , "A"        , "100 µM" , 0.6237  , 0         |
     7   , "A"        , "100 µM" , 0.6859  , 0         |
     8   , "A"        , "100 µM" , 0.8011  , 0         |
     9   , "A"        , "10 µM"  , 1.115   , 0         |
     10  , "A"        , "10 µM"  , 0.8961  , 0         |
     11  , "A"        , "10 µM"  , 0.7925  , 0         |
     12  , "A"        , "10 µM"  , 0.7006  , 0         |
     13  , "A"        , "10 µM"  , 0.6719  , 0         |
     14  , "A"        , "10 µM"  , 0.7363  , 0         |
     15  , "A"        , "10 µM"  , 0.8165  , 0         |
     16  , "A"        , "10 µM"  , 0.8958  , 0         |
     17  , "A"        , "1 µM"   , 1.168   , 0         |
     18  , "A"        , "1 µM"   , 1.066   , 0         |
     19  , "A"        , "1 µM"   , 0.8298  , 0         |
     20  , "A"        , "1 µM"   , 0.9315  , 0         |
     21  , "A"        , "1 µM"   , 0.8067  , 0         |
     22  , "A"        , "1 µM"   , 0.9296  , 0         |
     23  , "A"        , "1 µM"   , 0.8462  , 0         |
     24  , "A"        , "1 µM"   , 1.133   , 0         |
     25  , "A"        , "NK"     , 1.216   , 1         |
     26  , "A"        , "NK"     , 0.9856  , 1         |
     27  , "A"        , "NK"     , 0.8862  , 1         |
     28  , "A"        , "NK"     , 0.9599  , 1         |
     29  , "A"        , "NK"     , 0.8789  , 1         |
     30  , "A"        , "NK"     , 0.9304  , 1         |
     31  , "A"        , "NK"     , 1.057   , 1         |
     32  , "A"        , "NK"     , 1.086   , 1         |
     33  , "A"        , "10 µM X", 0.9173  , 0         |
     34  , "A"        , "10 µM X", 0.7754  , 0         |
     35  , "A"        , "10 µM X", 0.575   , 0         |
     36  , "A"        , "10 µM X", 0.6619  , 0         |
     37  , "A"        , "10 µM X", 0.5609  , 0         |
     38  , "A"        , "10 µM X", 0.5786  , 0         |
     39  , "A"        , "10 µM X", 0.6024  , 0         |
     40  , "A"        , "10 µM X", 0.889   , 0         |
     41  , "B"        , "100 µM" , 0.8515  , 0         |
     42  , "B"        , "100 µM" , 0.6961  , 0         |
     43  , "B"        , "100 µM" , 0.6754  , 0         |
     44  , "B"        , "100 µM" , 0.6816  , 0         |
     45  , "B"        , "100 µM" , 0.7234  , 0         |
     46  , "B"        , "100 µM" , 0.6961  , 0         |
     47  , "B"        , "100 µM" , 0.609   , 0         |
     48  , "B"        , "100 µM" , 0.7092  , 0         |
     49  , "B"        , "10 µM"  , 0.9482  , 0         |
     50  , "B"        , "10 µM"  , 0.8245  , 0         |
     51  , "B"        , "10 µM"  , 0.8456  , 0         |
     52  , "B"        , "10 µM"  , 0.8181  , 0         |
     53  , "B"        , "10 µM"  , 0.7784  , 0         |
     54  , "B"        , "10 µM"  , 0.7393  , 0         |
     55  , "B"        , "10 µM"  , 0.8035  , 0         |
     56  , "B"        , "10 µM"  , 0.8296  , 0         |
     57  , "B"        , "1 µM"   , 1.237   , 0         |
     58  , "B"        , "1 µM"   , 1.081   , 0         |
     59  , "B"        , "1 µM"   , 0.987   , 0         |
     60  , "B"        , "1 µM"   , 0.8797  , 0         |
     61  , "B"        , "1 µM"   , 0.8077  , 0         |
     62  , "B"        , "1 µM"   , 0.8874  , 0         |
     63  , "B"        , "1 µM"   , 0.8942  , 0         |
     64  , "B"        , "1 µM"   , 1.157   , 0         |
     65  , "B"        , "NK"     , 1.216   , 1         |
     66  , "B"        , "NK"     , 0.9856  , 1         |
     67  , "B"        , "NK"     , 0.8862  , 1         |
     68  , "B"        , "NK"     , 0.9599  , 1         |
     69  , "B"        , "NK"     , 0.8789  , 1         |
     70  , "B"        , "NK"     , 0.9304  , 1         |
     71  , "B"        , "NK"     , 1.057   , 1         |
     72  , "B"        , "NK"     , 1.086   , 1         |
     73  , "B"        , "10 µM X", 0.9173  , 0         |
     74  , "B"        , "10 µM X", 0.7754  , 0         |
     75  , "B"        , "10 µM X", 0.575   , 0         |
     76  , "B"        , "10 µM X", 0.6619  , 0         |
     77  , "B"        , "10 µM X", 0.5609  , 0         |
     78  , "B"        , "10 µM X", 0.5786  , 0         |
     79  , "B"        , "10 µM X", 0.6024  , 0         |
     80  , "B"        , "10 µM X", 0.889   , 0         |
     81  , "C"        , "100 µM" , 0.8144  , 0         |
     82  , "C"        , "100 µM" , 0.7734  , 0         |
     83  , "C"        , "100 µM" , 0.8364  , 0         |
     84  , "C"        , "100 µM" , 0.613   , 0         |
     85  , "C"        , "100 µM" , 0.6863  , 0         |
     86  , "C"        , "100 µM" , 0.5953  , 0         |
     87  , "C"        , "100 µM" , 0.5552  , 0         |
     88  , "C"        , "100 µM" , 0.9572  , 0         |
     89  , "C"        , "10 µM"  , 1.256   , 0         |
     90  , "C"        , "10 µM"  , 1.035   , 0         |
     91  , "C"        , "10 µM"  , 0.9852  , 0         |
     92  , "C"        , "10 µM"  , 0.9186  , 0         |
     93  , "C"        , "10 µM"  , 0.907   , 0         |
     94  , "C"        , "10 µM"  , 0.9979  , 0         |
     95  , "C"        , "10 µM"  , 1.075   , 0         |
     96  , "C"        , "10 µM"  , 1.144   , 0         |
     97  , "C"        , "1 µM"   , 1.218   , 0         |
     98  , "C"        , "1 µM"   , 1.078   , 0         |
     99  , "C"        , "1 µM"   , 0.7224  , 0         |
     100 , "C"        , "1 µM"   , 0.9967  , 0         |
     101 , "C"        , "1 µM"   , 0.9748  , 0         |
     102 , "C"        , "1 µM"   , 0.6157  , 0         |
     103 , "C"        , "1 µM"   , 0.7437  , 0         |
     104 , "C"        , "1 µM"   , 1.191   , 0         |
     105 , "C"        , "NK"     , 1.317   , 1         |
     106 , "C"        , "NK"     , 0.9297  , 1         |
     107 , "C"        , "NK"     , 0.9184  , 1         |
     108 , "C"        , "NK"     , 0.8788  , 1         |
     109 , "C"        , "NK"     , 0.8719  , 1         |
     110 , "C"        , "NK"     , 0.9015  , 1         |
     111 , "C"        , "NK"     , 0.9266  , 1         |
     112 , "C"        , "NK"     , 1.256   , 1         |
     113 , "C"        , "10 µM X", 0.9173  , 0         |
     114 , "C"        , "10 µM X", 0.7754  , 0         |
     115 , "C"        , "10 µM X", 0.575   , 0         |
     116 , "C"        , "10 µM X", 0.6619  , 0         |
     117 , "C"        , "10 µM X", 0.5609  , 0         |
     118 , "C"        , "10 µM X", 0.5786  , 0         |
     119 , "C"        , "10 µM X", 0.6024  , 0         |
     120 , "C"        , "10 µM X", 0.889   , 0         |
     121 , "D"        , "100 µM" , 0.7008  , 0         |
     122 , "D"        , "100 µM" , 0.7397  , 0         |
     123 , "D"        , "100 µM" , 0.6957  , 0         |
     124 , "D"        , "100 µM" , 0.5245  , 0         |
     125 , "D"        , "100 µM" , 0.5118  , 0         |
     126 , "D"        , "100 µM" , 0.5568  , 0         |
     127 , "D"        , "100 µM" , 0.7559  , 0         |
     128 , "D"        , "100 µM" , 0.8191  , 0         |
     129 , "D"        , "10 µM"  , 1.056   , 0         |
     130 , "D"        , "10 µM"  , 0.8304  , 0         |
     131 , "D"        , "10 µM"  , 0.8152  , 0         |
     132 , "D"        , "10 µM"  , 0.709   , 0         |
     133 , "D"        , "10 µM"  , 0.7035  , 0         |
     134 , "D"        , "10 µM"  , 0.6735  , 0         |
     135 , "D"        , "10 µM"  , 0.7893  , 0         |
     136 , "D"        , "10 µM"  , 0.9783  , 0         |
     137 , "D"        , "1 µM"   , 1.294   , 0         |
     138 , "D"        , "1 µM"   , 0.9011  , 0         |
     139 , "D"        , "1 µM"   , 0.9489  , 0         |
     140 , "D"        , "1 µM"   , 0.7918  , 0         |
     141 , "D"        , "1 µM"   , 0.7772  , 0         |
     142 , "D"        , "1 µM"   , 0.759   , 0         |
     143 , "D"        , "1 µM"   , 0.8787  , 0         |
     144 , "D"        , "1 µM"   , 1.132   , 0         |
     145 , "D"        , "10 µM X", 0.9173  , 0         |
     146 , "D"        , "10 µM X", 0.7754  , 0         |
     147 , "D"        , "10 µM X", 0.575   , 0         |
     148 , "D"        , "10 µM X", 0.6619  , 0         |
     149 , "D"        , "10 µM X", 0.5609  , 0         |
     150 , "D"        , "10 µM X", 0.5786  , 0         |
     151 , "D"        , "10 µM X", 0.6024  , 0         |
     152 , "D"        , "10 µM X", 0.889   , 0         |
     153 , "D"        , "NK"     , 1.317   , 1         |
     154 , "D"        , "NK"     , 0.9297  , 1         |
     155 , "D"        , "NK"     , 0.9184  , 1         |
     156 , "D"        , "NK"     , 0.8788  , 1         |
     157 , "D"        , "NK"     , 0.8719  , 1         |
     158 , "D"        , "NK"     , 0.9015  , 1         |
     159 , "D"        , "NK"     , 0.9266  , 1         |
     160 , "D"        , "NK"     , 1.256   , 1         |
     161 , "E"        , "100 µM" , 0.9406  , 0         |
     162 , "E"        , "100 µM" , 0.7404  , 0         |
     163 , "E"        , "100 µM" , 0.7166  , 0         |
     164 , "E"        , "100 µM" , 0.5353  , 0         |
     165 , "E"        , "100 µM" , 0.6104  , 0         |
     166 , "E"        , "100 µM" , 0.555   , 0         |
     167 , "E"        , "100 µM" , 0.6529  , 0         |
     168 , "E"        , "100 µM" , 0.8834  , 0         |
     169 , "E"        , "10 µM"  , 0.9397  , 0         |
     170 , "E"        , "10 µM"  , 0.8172  , 0         |
     171 , "E"        , "10 µM"  , 0.7854  , 0         |
     172 , "E"        , "10 µM"  , 0.8047  , 0         |
     173 , "E"        , "10 µM"  , 0.7185  , 0         |
     174 , "E"        , "10 µM"  , 0.7277  , 0         |
     175 , "E"        , "10 µM"  , 0.852   , 0         |
     176 , "E"        , "10 µM"  , 0.9809  , 0         |
     177 , "E"        , "1 µM"   , 1.295   , 0         |
     178 , "E"        , "1 µM"   , 1.138   , 0         |
     179 , "E"        , "1 µM"   , 0.86    , 0         |
     180 , "E"        , "1 µM"   , 0.9272  , 0         |
     181 , "E"        , "1 µM"   , 0.8434  , 0         |
     182 , "E"        , "1 µM"   , 0.92    , 0         |
     183 , "E"        , "1 µM"   , 0.8568  , 0         |
     184 , "E"        , "1 µM"   , 1.184   , 0         |
     185 , "E"        , "10 µM X", 0.9173  , 0         |
     186 , "E"        , "10 µM X", 0.7754  , 0         |
     187 , "E"        , "10 µM X", 0.575   , 0         |
     188 , "E"        , "10 µM X", 0.6619  , 0         |
     189 , "E"        , "10 µM X", 0.5609  , 0         |
     190 , "E"        , "10 µM X", 0.5786  , 0         |
     191 , "E"        , "10 µM X", 0.6024  , 0         |
     192 , "E"        , "10 µM X", 0.889   , 0         |
     193 , "E"        , "NK"     , 1.317   , 1         |
     194 , "E"        , "NK"     , 0.9297  , 1         |
     195 , "E"        , "NK"     , 0.9184  , 1         |
     196 , "E"        , "NK"     , 0.8788  , 1         |
     197 , "E"        , "NK"     , 0.8719  , 1         |
     198 , "E"        , "NK"     , 0.9015  , 1         |
     199 , "E"        , "NK"     , 0.9266  , 1         |
     200 , "E"        , "NK"     , 1.256   , 1         |
     201 , "A"        , "NK"     , 1.317   , 2         |
     202 , "A"        , "NK"     , 0.9297  , 2         |
     203 , "A"        , "NK"     , 0.9184  , 2         |
     204 , "A"        , "NK"     , 0.8788  , 2         |
     205 , "A"        , "NK"     , 0.8719  , 2         |
     206 , "A"        , "NK"     , 0.9015  , 2         |
     207 , "A"        , "NK"     , 0.9266  , 2         |
     208 , "A"        , "NK"     , 1.256   , 2         |
     209 , "B"        , "NK"     , 1.317   , 2         |
     210 , "B"        , "NK"     , 0.9297  , 2         |
     211 , "B"        , "NK"     , 0.9184  , 2         |
     212 , "B"        , "NK"     , 0.8788  , 2         |
     213 , "B"        , "NK"     , 0.8719  , 2         |
     214 , "B"        , "NK"     , 0.9015  , 2         |
     215 , "B"        , "NK"     , 0.9266  , 2         |
     216 , "B"        , "NK"     , 1.256   , 2         |
     217 , "C"        , "NK"     , 1.216   , 2         |
     218 , "C"        , "NK"     , 0.9856  , 2         |
     219 , "C"        , "NK"     , 0.8862  , 2         |
     220 , "C"        , "NK"     , 0.9599  , 2         |
     221 , "C"        , "NK"     , 0.8789  , 2         |
     222 , "C"        , "NK"     , 0.9304  , 2         |
     223 , "C"        , "NK"     , 1.057   , 2         |
     224 , "C"        , "NK"     , 1.086   , 2         |
     225 , "D"        , "NK"     , 1.216   , 2         |
     226 , "D"        , "NK"     , 0.9856  , 2         |
     227 , "D"        , "NK"     , 0.8862  , 2         |
     228 , "D"        , "NK"     , 0.9599  , 2         |
     229 , "D"        , "NK"     , 0.8789  , 2         |
     230 , "D"        , "NK"     , 0.9304  , 2         |
     231 , "D"        , "NK"     , 1.057   , 2         |
     232 , "D"        , "NK"     , 1.086   , 2         |
     233 , "E"        , "NK"     , 1.216   , 2         |
     234 , "E"        , "NK"     , 0.9856  , 2         |
     235 , "E"        , "NK"     , 0.8862  , 2         |
     236 , "E"        , "NK"     , 0.9599  , 2         |
     237 , "E"        , "NK"     , 0.8789  , 2         |
     238 , "E"        , "NK"     , 0.9304  , 2         |
     239 , "E"        , "NK"     , 1.057   , 2         |
     240 , "E"        , "NK"     , 1.086   , 2         )

我应该得到的数据集:

df <- wrapr::build_frame(
   "ID"  , "Treatment", "conc"   , "relabs" |
     1   , "A"        , "100 µM" , 0.9869   |
     2   , "A"        , "100 µM" , 0.8644   |
     3   , "A"        , "100 µM" , 0.7745   |
     4   , "A"        , "100 µM" , 0.7793   |
     5   , "A"        , "100 µM" , 0.675    |
     6   , "A"        , "100 µM" , 0.6719   |
     7   , "A"        , "100 µM" , 0.739    |
     8   , "A"        , "100 µM" , 0.8631   |
     9   , "A"        , "10 µM"  , 0.9654   |
     10  , "A"        , "10 µM"  , 0.8538   |
     11  , "A"        , "10 µM"  , 0.7548   |
     12  , "A"        , "10 µM"  , 0.7239   |
     13  , "A"        , "10 µM"  , 0.7933   |
     14  , "A"        , "10 µM"  , 0.8797   |
     15  , "A"        , "10 µM"  , 0.9651   |
     16  , "A"        , "1 µM"   , 0.894    |
     17  , "A"        , "1 µM"   , 1.004    |
     18  , "A"        , "1 µM"   , 0.8691   |
     19  , "A"        , "1 µM"   , 1.002    |
     20  , "A"        , "1 µM"   , 0.9117   |
     21  , "A"        , "10 µM X", 1.014    |
     22  , "A"        , "10 µM X", 0.8573   |
     23  , "A"        , "10 µM X", 0.6358   |
     24  , "A"        , "10 µM X", 0.7318   |
     25  , "A"        , "10 µM X", 0.6201   |
     26  , "A"        , "10 µM X", 0.6397   |
     27  , "A"        , "10 µM X", 0.666    |
     28  , "A"        , "10 µM X", 0.9829   |
     29  , "A"        , "NK"     , 1.062    |
     30  , "A"        , "NK"     , 0.9548   |
     31  , "A"        , "NK"     , 1.034    |
     32  , "A"        , "NK"     , 0.9469   |
     33  , "A"        , "NK"     , 1.002    |
     34  , "A"        , "NK"     , 1.028    |
     35  , "A"        , "NK"     , 1.015    |
     36  , "A"        , "NK"     , 0.9716   |
     37  , "A"        , "NK"     , 0.964    |
     38  , "A"        , "NK"     , 0.9967   |
     39  , "A"        , "NK"     , 1.024    |
     40  , "B"        , "100 µM" , 0.9174   |
     41  , "B"        , "100 µM" , 0.75     |
     42  , "B"        , "100 µM" , 0.7277   |
     43  , "B"        , "100 µM" , 0.7344   |
     44  , "B"        , "100 µM" , 0.7794   |
     45  , "B"        , "100 µM" , 0.7499   |
     46  , "B"        , "100 µM" , 0.6561   |
     47  , "B"        , "100 µM" , 0.7641   |
     48  , "B"        , "10 µM"  , 1.022    |
     49  , "B"        , "10 µM"  , 0.8883   |
     50  , "B"        , "10 µM"  , 0.911    |
     51  , "B"        , "10 µM"  , 0.8814   |
     52  , "B"        , "10 µM"  , 0.8387   |
     53  , "B"        , "10 µM"  , 0.7965   |
     54  , "B"        , "10 µM"  , 0.8656   |
     55  , "B"        , "10 µM"  , 0.8938   |
     56  , "B"        , "1 µM"   , 1.063    |
     57  , "B"        , "1 µM"   , 0.9478   |
     58  , "B"        , "1 µM"   , 0.8702   |
     59  , "B"        , "1 µM"   , 0.9561   |
     60  , "B"        , "1 µM"   , 0.9634   |
     61  , "B"        , "NK"     , 1.062    |
     62  , "B"        , "NK"     , 0.9548   |
     63  , "B"        , "NK"     , 1.034    |
     64  , "B"        , "NK"     , 0.9469   |
     65  , "B"        , "NK"     , 1.002    |
     66  , "B"        , "NK"     , 1.028    |
     67  , "B"        , "NK"     , 1.015    |
     68  , "B"        , "NK"     , 0.9716   |
     69  , "B"        , "NK"     , 0.964    |
     70  , "B"        , "NK"     , 0.9967   |
     71  , "B"        , "NK"     , 1.024    |
     72  , "B"        , "10 µM X", 1.014    |
     73  , "B"        , "10 µM X", 0.8573   |
     74  , "B"        , "10 µM X", 0.6358   |
     75  , "B"        , "10 µM X", 0.7318   |
     76  , "B"        , "10 µM X", 0.6201   |
     77  , "B"        , "10 µM X", 0.6397   |
     78  , "B"        , "10 µM X", 0.666    |
     79  , "B"        , "10 µM X", 0.9829   |
     80  , "C"        , "100 µM" , 0.9005   |
     81  , "C"        , "100 µM" , 0.8551   |
     82  , "C"        , "100 µM" , 0.9248   |
     83  , "C"        , "100 µM" , 0.6778   |
     84  , "C"        , "100 µM" , 0.7588   |
     85  , "C"        , "100 µM" , 0.6582   |
     86  , "C"        , "100 µM" , 0.6138   |
     87  , "C"        , "100 µM" , 1.058    |
     88  , "C"        , "10 µM"  , 1.144    |
     89  , "C"        , "10 µM"  , 1.089    |
     90  , "C"        , "10 µM"  , 1.016    |
     91  , "C"        , "10 µM"  , 1.003    |
     92  , "C"        , "10 µM"  , 1.103    |
     93  , "C"        , "10 µM"  , 1.188    |
     94  , "C"        , "1 µM"   , 1.192    |
     95  , "C"        , "1 µM"   , 0.7987   |
     96  , "C"        , "1 µM"   , 1.102    |
     97  , "C"        , "1 µM"   , 1.078    |
     98  , "C"        , "1 µM"   , 0.6808   |
     99  , "C"        , "1 µM"   , 0.8223   |
     100 , "C"        , "NK"     , 1.028    |
     101 , "C"        , "NK"     , 1.015    |
     102 , "C"        , "NK"     , 0.9716   |
     103 , "C"        , "NK"     , 0.964    |
     104 , "C"        , "NK"     , 0.9967   |
     105 , "C"        , "NK"     , 1.024    |
     106 , "C"        , "NK"     , 1.062    |
     107 , "C"        , "NK"     , 0.9548   |
     108 , "C"        , "NK"     , 1.034    |
     109 , "C"        , "NK"     , 0.9469   |
     110 , "C"        , "NK"     , 1.002    |
     111 , "C"        , "10 µM X", 1.014    |
     112 , "C"        , "10 µM X", 0.8573   |
     113 , "C"        , "10 µM X", 0.6358   |
     114 , "C"        , "10 µM X", 0.7318   |
     115 , "C"        , "10 µM X", 0.6201   |
     116 , "C"        , "10 µM X", 0.6397   |
     117 , "C"        , "10 µM X", 0.666    |
     118 , "C"        , "10 µM X", 0.9829   |
     119 , "D"        , "100 µM" , 0.7748   |
     120 , "D"        , "100 µM" , 0.8178   |
     121 , "D"        , "100 µM" , 0.7692   |
     122 , "D"        , "100 µM" , 0.5798   |
     123 , "D"        , "100 µM" , 0.5658   |
     124 , "D"        , "100 µM" , 0.6157   |
     125 , "D"        , "100 µM" , 0.8357   |
     126 , "D"        , "100 µM" , 0.9056   |
     127 , "D"        , "10 µM"  , 1.168    |
     128 , "D"        , "10 µM"  , 0.9181   |
     129 , "D"        , "10 µM"  , 0.9013   |
     130 , "D"        , "10 µM"  , 0.7839   |
     131 , "D"        , "10 µM"  , 0.7778   |
     132 , "D"        , "10 µM"  , 0.7447   |
     133 , "D"        , "10 µM"  , 0.8727   |
     134 , "D"        , "10 µM"  , 1.082    |
     135 , "D"        , "1 µM"   , 0.9963   |
     136 , "D"        , "1 µM"   , 1.049    |
     137 , "D"        , "1 µM"   , 0.8755   |
     138 , "D"        , "1 µM"   , 0.8593   |
     139 , "D"        , "1 µM"   , 0.8392   |
     140 , "D"        , "1 µM"   , 0.9715   |
     141 , "D"        , "1 µM"   , 1.251    |
     142 , "D"        , "10 µM X", 1.014    |
     143 , "D"        , "10 µM X", 0.8573   |
     144 , "D"        , "10 µM X", 0.6358   |
     145 , "D"        , "10 µM X", 0.7318   |
     146 , "D"        , "10 µM X", 0.6201   |
     147 , "D"        , "10 µM X", 0.6397   |
     148 , "D"        , "10 µM X", 0.666    |
     149 , "D"        , "10 µM X", 0.9829   |
     150 , "D"        , "NK"     , 1.028    |
     151 , "D"        , "NK"     , 1.015    |
     152 , "D"        , "NK"     , 0.9716   |
     153 , "D"        , "NK"     , 0.964    |
     154 , "D"        , "NK"     , 0.9967   |
     155 , "D"        , "NK"     , 1.024    |
     156 , "D"        , "NK"     , 1.062    |
     157 , "D"        , "NK"     , 0.9548   |
     158 , "D"        , "NK"     , 1.034    |
     159 , "D"        , "NK"     , 0.9469   |
     160 , "D"        , "NK"     , 1.002    |
     161 , "E"        , "100 µM" , 1.04     |
     162 , "E"        , "100 µM" , 0.8186   |
     163 , "E"        , "100 µM" , 0.7923   |
     164 , "E"        , "100 µM" , 0.5918   |
     165 , "E"        , "100 µM" , 0.6749   |
     166 , "E"        , "100 µM" , 0.6136   |
     167 , "E"        , "100 µM" , 0.7218   |
     168 , "E"        , "100 µM" , 0.9768   |
     169 , "E"        , "10 µM"  , 1.039    |
     170 , "E"        , "10 µM"  , 0.9035   |
     171 , "E"        , "10 µM"  , 0.8684   |
     172 , "E"        , "10 µM"  , 0.8898   |
     173 , "E"        , "10 µM"  , 0.7944   |
     174 , "E"        , "10 µM"  , 0.8046   |
     175 , "E"        , "10 µM"  , 0.942    |
     176 , "E"        , "10 µM"  , 1.085    |
     177 , "E"        , "1 µM"   , 0.9508   |
     178 , "E"        , "1 µM"   , 1.025    |
     179 , "E"        , "1 µM"   , 0.9325   |
     180 , "E"        , "1 µM"   , 1.017    |
     181 , "E"        , "1 µM"   , 0.9473   |
     182 , "E"        , "10 µM X", 1.014    |
     183 , "E"        , "10 µM X", 0.8573   |
     184 , "E"        , "10 µM X", 0.6358   |
     185 , "E"        , "10 µM X", 0.7318   |
     186 , "E"        , "10 µM X", 0.6201   |
     187 , "E"        , "10 µM X", 0.6397   |
     188 , "E"        , "10 µM X", 0.666    |
     189 , "E"        , "10 µM X", 0.9829   |
     190 , "E"        , "NK"     , 1.028    |
     191 , "E"        , "NK"     , 1.015    |
     192 , "E"        , "NK"     , 0.9716   |
     193 , "E"        , "NK"     , 0.964    |
     194 , "E"        , "NK"     , 0.9967   |
     195 , "E"        , "NK"     , 1.024    |
     196 , "E"        , "NK"     , 1.062    |
     197 , "E"        , "NK"     , 0.9548   |
     198 , "E"        , "NK"     , 1.034    |
     199 , "E"        , "NK"     , 0.9469   |
     200 , "E"        , "NK"     , 1.002    )

我使用函数 identify_outliers by Treatmentandconc它运行良好,但我也需要这个函数从我的数据集中分别从conc“NK”和Control“1”和“2”计算异常值,而不是组合,然后排除所有异常值anti_join

我当前的代码,它没有分别计算来自“NK”和“1”和“2”的异常值。Identify_outliers来自rstatix。

df_outliers <-
df %>% 
  group_by(Treatment, conc) %>% 
  identify_outliers("relabs") 

df_outliers

我通过以下方式排除异常值anti_join

df_wo_outliers <- 
  df %>% 
  anti_join(df_outliers, by = "ID") %>% 
  view()

我有很多数据,所以如果我能弄清楚如何用 R 来做,我不想手动做。

有没有办法计算异常值 和 分组Treatmentconc以及如果Treatment等于“NK”并且控制是“1”和“2”,则单独计算它?

在excel中手动计算异常值:

Excel_1 Excel_2

标签: rexceloutliersiqr

解决方案


** 更新 **

如果我们使用你的data,我们会在所有组中发现 9 个异常值。我不确定你是如何确定 40 的。但这种group_by方法基本上是有效的。

library(dplyr)
library(rstatix)

data <- as_tibble(data) # for printing 

df_outliers <-
data %>% 
  group_by(Treatment, conc) %>%
  identify_outliers(relabs)

data %>% 
  anti_join(df_outliers, by = "ID") 
#> # A tibble: 231 x 5
#>       ID Treatment conc   relabs Control
#>    <dbl> <chr>     <chr>   <dbl>   <dbl>
#>  1     1 A         100 µM  0.916       0
#>  2     2 A         100 µM  0.802       0
#>  3     3 A         100 µM  0.719       0
#>  4     4 A         100 µM  0.723       0
#>  5     5 A         100 µM  0.626       0
#>  6     6 A         100 µM  0.624       0
#>  7     7 A         100 µM  0.686       0
#>  8     8 A         100 µM  0.801       0
#>  9     9 A         10 µM   1.12        0
#> 10    10 A         10 µM   0.896       0
#> # … with 221 more rows

reprex 包(v0.3.0)于 2021-09-26 创建

旧答案

rstatix::identify_outliers效果很好,dplyr::group_by但要做到这一点,我们首先需要数据中的异常值。下面我更改了数据以显示如何identify_outliers工作。

library(dplyr)
library(rstatix)

set.seed(1)
# setup:
# lets make the data set bigger
# and introduce some outliers in each group by making `relab` larger
df2 <- df %>%
  slice_sample(n = 10000,
               replace = TRUE) %>% 
  group_by(Treatment, conc) %>% 
  mutate(row_id = row_number(),
         relabs = ifelse(row_id == 1, rnorm(1) * 100, relabs)) 

df_outliers <-
  df2 %>% 
  # df2 is already grouped so we don't need this here:
  # group_by(Treatment, conc) %>%
  identify_outliers(relabs)

df2 %>% 
  anti_join(df_outliers, by = c("ID", "row_id")) 

#> # A tibble: 9,990 x 6
#> # Groups:   Treatment, conc [10]
#>       ID Treatment conc  relabs Control row_id
#>    <dbl> <chr>     <chr>  <dbl>   <dbl>  <int>
#>  1     1 A         NK      0.91       1      2
#>  2     2 A         NK      0.83       2      3
#>  3    19 B         NK      0.98       1      2
#>  4     1 A         NK      0.91       1      4
#>  5    21 B         10X     0.66       0      2
#>  6    10 A         100     0.12       0      2
#>  7    14 B         NK      0.93       2      3
#>  8    10 A         100     0.12       0      3
#>  9     7 A         NK      0.73       1      5
#> 10    15 B         10X     0.62       0      3
#> # … with 9,980 more rows

reprex 包(v0.3.0)于 2021-09-26 创建


推荐阅读