首页 > 解决方案 > For循环:如何为特定迭代应用不同的命令

问题描述

您好,我有以下内容data frame,我想对其进行分析,for loop但在分析代码中,我想仅删除一个基因(Hbvrt)的一个或多个样本,因此它们不应包含在整个分析中。

## create some data
sample_ID <- rep(c('s1','s2','s3','s4'),4)
gene_ID <- c( rep('TFT',4) , rep('Hbvrt' ,4), rep('Myx4',4), rep('Rai56n',4))
readz <- runif(16, 5000, 7500) 

df <- data.frame(sample_ID , gene_ID , readz)

## start the loop 
res <- list()
for ( g in unique(df$gene_ID)){
  
  df_g <- df[df$gene_ID == g, ]
  
  df_g$Nanost <- runif(4, 5000, 7500)
  
  df_g$NEW <- df_g$Nanost / df_g$readz * 100 

  ## AND long code here ....

## function for graph ## Graphs Not shown here  
  scatter_fun = function(x, y) {
    
    ggscatter(df_g, x = "readz", y = "Nanost", 
              add = "reg.line", conf.int = TRUE, 
              cor.coef = TRUE, cor.method = "pearson",
              xlab = "readz", ylab = "Nanost")
    
  }

   res[[length(res)+1]]<-df_g
}

print(res)

[[1]]
  sample_ID gene_ID    readz   Nanost       NEW
1        s1     TFT 6577.112 6582.497 100.08186
2        s2     TFT 6914.966 6192.676  89.55468
3        s3     TFT 7494.457 6508.501  86.84420
4        s4     TFT 7069.737 5966.418  84.39378

[[2]]
  sample_ID gene_ID    readz   Nanost       NEW
5        s1   Hbvrt 6346.545 7499.966 118.17399
6        s2   Hbvrt 7368.858 6860.801  93.10536
7        s3   Hbvrt 5671.581 5604.065  98.80957
8        s4   Hbvrt 6067.496 7420.354 122.29680 ## REMOVE THIS in the result and graph

[[3]]
   sample_ID gene_ID    readz   Nanost       NEW
9         s1    Myx4 5270.035 7086.622 134.47011
10        s2    Myx4 7338.199 5670.227  77.27002
11        s3    Myx4 5596.834 5595.212  99.97101
12        s4    Myx4 5477.589 7472.254 136.41502

[[4]]
   sample_ID gene_ID    readz   Nanost      NEW
13        s1  Rai56n 6526.715 6475.832 99.22040
14        s2  Rai56n 5512.179 5137.163 93.19660
15        s3  Rai56n 6109.446 5221.244 85.46182
16        s4  Rai56n 5836.242 5602.662 95.99776

我试过if else这样的声明:(在for循环中添加,但也没有用)

for ( g in unique(df$gene_ID)){
  
  df_g <- df[df$gene_ID == g, ]

### here to exclude a sample or more..
df_ex <- df[df$sample_ID != 's4' ,]

if (g == Hbvrt){ 
df_g <- df_ex
} else if (g != Hbvrt){
df_g <- df_g
}

.
.
.
.
. ## the same as before 

提前非常感谢。

标签: r

解决方案


你可以试试 tidyverse

library(tidyverse)
df %>% 
  mutate(Nanost = runif(n(), 5000, 7500),
         NEW = Nanost / readz * 100) %>% 
  filter(!(gene_ID == "Hbvrt" &  sample_ID == "s4")) %>% 
  split(.$gene_ID)
$Hbvrt
sample_ID gene_ID    readz   Nanost       NEW
5        s1   Hbvrt 5745.199 6183.276 107.62510
6        s2   Hbvrt 5516.671 7123.757 129.13144
7        s3   Hbvrt 5521.244 5477.057  99.19969

$Myx4
sample_ID gene_ID    readz   Nanost       NEW
8         s1    Myx4 7379.206 5452.667  73.89233
9         s2    Myx4 6384.764 5388.520  84.39653
10        s3    Myx4 6974.163 6435.838  92.28116
11        s4    Myx4 5645.421 6328.313 112.09639

$Rai56n
sample_ID gene_ID    readz   Nanost       NEW
12        s1  Rai56n 6760.502 5206.982  77.02064
13        s2  Rai56n 5221.044 5927.277 113.52667
14        s3  Rai56n 5572.512 5535.466  99.33520
15        s4  Rai56n 6438.368 5470.014  84.95964

$TFT
sample_ID gene_ID    readz   Nanost       NEW
1        s1     TFT 7456.672 7049.609  94.54095
2        s2     TFT 6497.004 6943.117 106.86643
3        s3     TFT 5028.566 6971.517 138.63828
4        s4     TFT 6947.292 6704.789  96.50939

在基地R尝试

df_g$Nanost <- runif(nrow(df_g), 5000, 7500)
df_g$NEW <- df_g$Nanost / df_g$readz * 100 
gr = !(df_g$gene_ID == "Hbvrt" &  df_g$sample_ID == "s4")
df_list <- split(df_g[gr,], df_g$gene_ID[gr])

不需要循环。绘图可以使用lapply

plots <- lapply(df_list, function(x) ggplot(x, aes(readz, NEW)) + geom_point())

但是如果你想坚持一个循环,那么只需在这里删除基因

for (g in unique(df$gene_ID[df_g$gene_ID != "Hbvrt"])){
 

推荐阅读