首页 > 解决方案 > 我的循环没有进行应有的连接

问题描述

我正在尝试创建一个函数,该函数能够将来自 8 个字符串(例如 AAARLAA)的字母转换为 20 位代码(10000000 ...),然后将结果连接起来,以便从 8 个字符串中获得一个 160 整数向量(其中每 20 个数字对应一个字母)。

这是我的脚本

  octamer_encoding <- function(octamero){

  resultado_palabras <- vector("integer", length(octamero))
  
  A = c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  R = c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  N = c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  D = c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  C = c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  E = c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
  Q = c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0)
  G = c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
  H = c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0)
  I = c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
  L = c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0)
  K = c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0)
  M = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0)
  F = c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0)
  P = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0)
  S = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0)
  T = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0)
  W = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0)
  Y = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0)
  V = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)
  
  numeros <- rbind(A,R,N,D,C,E,Q,G,H,I,L,K,M,F,P,S,T,W,Y,V)
  letras <- c('A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V')
  
  n = length(octamero)
  h = length(letras)
  
    for (j in 1:n){
      for (k in 1:h){
        if ((substring(octamero, first = j, last = j)) ==  (letras[k])){
          resultado_palabras <- c(resultado_palabras, numeros[k,])
        }
      }
    }
  
  return(resultado_palabras)
}

由于某种原因,我获得的结果只是一个 20 数字向量,对应于字符串的第一个字母,这意味着循环无法连接不同的结果,但我不明白我做错了什么。

标签: r

解决方案


代码审查

您需要进行一些更改。您应该在下面看到它们:

octamer_encoding <- function(octamero){
 
 resultado_palabras <- c() ######### EDIT 1: YOU NEED TO CREATE AN EMPTY VECTOR FIRST
 
 A = c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
 R = c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
 N = c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
 D = c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
 C = c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
 E = c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
 Q = c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0)
 G = c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
 H = c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0)
 I = c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
 L = c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0)
 K = c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0)
 M = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0)
 F = c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0)
 P = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0)
 S = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0)
 T = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0)
 W = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0)
 Y = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0)
 V = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)
 
 numeros <- rbind(A,R,N,D,C,E,Q,G,H,I,L,K,M,F,P,S,T,W,Y,V)
 letras <- c('A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V')
 
 n = nchar(octamero) ############ EDIT 2: nchar instead of length.
 h = length(letras)
 
 for (j in 1:n){
  for (k in 1:h){
   if ((substring(octamero, first = j, last = j)) ==  (letras[k])){
    resultado_palabras <- c(resultado_palabras, numeros[k,])
   }
  }
 }
 
 return(unname(resultado_palabras))
}

更紧凑

另外,我冒昧地简化了您的代码以帮助您:

octamer_encoding2 <- function(octamero){
 
 letras <- c('A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V')
 numeros <- diag(1, length(letras))
 colnames(numeros) <- letras
 c(numeros[,strsplit(octamero, "")[[1]]])
 
}

octamer_encoding("AAARLAA")
#>   [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
#>  [46] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#>  [91] 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#> [136] 0 0 0 0 0

它的功能完全相同,但更紧凑。

而且更快:

microbenchmark::microbenchmark(octamer_encoding("AAARLAA"),
                               octamer_encoding2("AAARLAA"))
#> Unit: microseconds
#>                          expr   min     lq    mean median     uq    max neval
#>   octamer_encoding("AAARLAA") 604.3 632.95 735.284  673.2 760.70 1278.1   100
#>  octamer_encoding2("AAARLAA")  17.5  19.75  31.425   24.3  36.95  132.8   100

不同的编码

如果您对您的 . 的不同(更紧凑)编码感兴趣letras,您可以只使用 5 位数字来识别 20 个不同的字母。

您可以定义numeros如下:

library(binaryLogic)
l <- length(letras)
numeros <- +simplify2array(as.binary(seq_len(l), n = ceiling(logb(l,2))))
colnames(numeros) <- letras
numeros
#>      A R N D C E Q G H I L K M F P S T W Y V
#> [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1
#> [2,] 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0
#> [3,] 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1
#> [4,] 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
#> [5,] 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0

推荐阅读