首页 > 解决方案 > Applying t-test to matrix without using a loop

问题描述

I am trying to apply a t-test to a matrix of TPM values for RNAseq data and I am getting an error with my code. I am new to R so I am still learning writing scripts. My file has 7 columns - the first is a list of genes, 2:4 are replicates in group1, and 5:7 are replicates in group2. The error I am getting is:

t.test.default(x, y) : not enough 'x' observations.

# Read file
T <- read.table("./rnaseq_tpm_matrix_S_24v0.csv", sep=",", header=TRUE)
# Assign groups of data
x <- T[i,grep('^Sample_S_0',names(T))]
y <- T[i,grep('^Sample_S_24',names(T))]
# Calculate average of each group for each gene
group1_avg <- as.matrix(sum(x)/length(x))
group2_avg <- as.matrix(sum(y)/length(y))
# Calculate log2 (TPM +1)
x <- log2(as.matrix(group1_avg)+1)
y <- log2(as.matrix(group2_avg)+1)
dat <- data.frame(values=c(x,y),vars = rep(c("Sample_SKOV3_0","Sample_SKOV3_24"), times = c(length(x),length(y))))
# Apply t-test
results <- apply(dat, 1, function(dat) {
    t.test(x,y)$p.value})
#Bind p-values into matrix
cbind(dat, pvals = results)
Error in t.test.default(x = dat[1:x], y = dat[(x + 1):(x + y)]) : 
  not enough 'x' observations
Error during wrapup: arguments imply differing number of rows: 2, 5
Error: no more error handlers available (recursive errors?); invoking 'abort' restart
dput(head(T, 20))
structure(list(Sample_H_repA = c("5S_rRNA", "5_8S_rRNA", 
"6M1-18", "7M1-2", "7SK", "A1BG", "A1BG-AS1", "A1CF", "A2M", 
"A2M-AS1", "A2ML1", "A2MP1", "A3GALT2", "A4GALT", "A4GNT", "AA06", 
"AAAS", "AACS", "AACSP1", "AADAC"), Sample_S_0_rep1 = c(0, 
0, 0, 0, 2478.55, 3.91, 0.69, 0, 0.63, 0, 1.94, 0, 0, 3.37, 0, 
0, 71.22, 20.3, 0, 1.6), Sample_S_0_rep2 = c(0, 
0, 0, 0, 789.63, 2.98, 0.15, 0, 0, 0, 0, 0, 0, 8.12, 0, 0, 109.03, 
8.84, 0.11, 0), Sample_S_0_rep3 = c(0, 0, 0, 0, 
802.76, 2.99, 0.63, 0, 0, 0.02, 0, 0, 0, 5.59, 0, 0, 108.95, 
10.69, 0.11, 0), Sample_S_24_rep1 = c(0, 0, 0, 
0, 993.88, 3.71, 0.35, 0, 0, 0, 0, 0, 0, 2.9, 0, 0, 106.5, 17.53, 
0.09, 0), Sample_S_24_rep2 = c(0, 0, 0, 0, 896.23, 
3.48, 0.09, 0, 0, 0, 0, 0, 0, 6.95, 0, 0, 110.04, 15.05, 0.12, 
0), Sample_S_24_rep3 = c(0, 0, 0, 0, 947.83, 4.02, 
0.09, 0, 0, 0, 0, 0, 0, 7.04, 0, 0, 110.49, 12.71, 0.06, 0)), row.names = c(NA, 
20L), class = "data.frame")

Sample Data:
Sample Data

标签: rmatrixt-test

解决方案


推荐阅读