r - 我的循环没有进行应有的连接
问题描述
我正在尝试创建一个函数,该函数能够将来自 8 个字符串(例如 AAARLAA)的字母转换为 20 位代码(10000000 ...),然后将结果连接起来,以便从 8 个字符串中获得一个 160 整数向量(其中每 20 个数字对应一个字母)。
这是我的脚本
octamer_encoding <- function(octamero){
resultado_palabras <- vector("integer", length(octamero))
A = c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
R = c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
N = c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
D = c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
C = c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
E = c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
Q = c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0)
G = c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
H = c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0)
I = c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
L = c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0)
K = c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0)
M = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0)
F = c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0)
P = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0)
S = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0)
T = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0)
W = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0)
Y = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0)
V = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)
numeros <- rbind(A,R,N,D,C,E,Q,G,H,I,L,K,M,F,P,S,T,W,Y,V)
letras <- c('A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V')
n = length(octamero)
h = length(letras)
for (j in 1:n){
for (k in 1:h){
if ((substring(octamero, first = j, last = j)) == (letras[k])){
resultado_palabras <- c(resultado_palabras, numeros[k,])
}
}
}
return(resultado_palabras)
}
由于某种原因,我获得的结果只是一个 20 数字向量,对应于字符串的第一个字母,这意味着循环无法连接不同的结果,但我不明白我做错了什么。
解决方案
代码审查
您需要进行一些更改。您应该在下面看到它们:
octamer_encoding <- function(octamero){
resultado_palabras <- c() ######### EDIT 1: YOU NEED TO CREATE AN EMPTY VECTOR FIRST
A = c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
R = c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
N = c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
D = c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
C = c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
E = c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
Q = c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0)
G = c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
H = c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0)
I = c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
L = c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0)
K = c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0)
M = c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0)
F = c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0)
P = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0)
S = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0)
T = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0)
W = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0)
Y = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0)
V = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)
numeros <- rbind(A,R,N,D,C,E,Q,G,H,I,L,K,M,F,P,S,T,W,Y,V)
letras <- c('A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V')
n = nchar(octamero) ############ EDIT 2: nchar instead of length.
h = length(letras)
for (j in 1:n){
for (k in 1:h){
if ((substring(octamero, first = j, last = j)) == (letras[k])){
resultado_palabras <- c(resultado_palabras, numeros[k,])
}
}
}
return(unname(resultado_palabras))
}
更紧凑
另外,我冒昧地简化了您的代码以帮助您:
octamer_encoding2 <- function(octamero){
letras <- c('A','R','N','D','C','E','Q','G','H','I','L','K','M','F','P','S','T','W','Y','V')
numeros <- diag(1, length(letras))
colnames(numeros) <- letras
c(numeros[,strsplit(octamero, "")[[1]]])
}
octamer_encoding("AAARLAA")
#> [1] 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
#> [46] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#> [91] 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#> [136] 0 0 0 0 0
它的功能完全相同,但更紧凑。
而且更快:
microbenchmark::microbenchmark(octamer_encoding("AAARLAA"),
octamer_encoding2("AAARLAA"))
#> Unit: microseconds
#> expr min lq mean median uq max neval
#> octamer_encoding("AAARLAA") 604.3 632.95 735.284 673.2 760.70 1278.1 100
#> octamer_encoding2("AAARLAA") 17.5 19.75 31.425 24.3 36.95 132.8 100
不同的编码
如果您对您的 . 的不同(更紧凑)编码感兴趣letras
,您可以只使用 5 位数字来识别 20 个不同的字母。
您可以定义numeros
如下:
library(binaryLogic)
l <- length(letras)
numeros <- +simplify2array(as.binary(seq_len(l), n = ceiling(logb(l,2))))
colnames(numeros) <- letras
numeros
#> A R N D C E Q G H I L K M F P S T W Y V
#> [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1
#> [2,] 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0
#> [3,] 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1 0 0 0 0 1
#> [4,] 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1 0
#> [5,] 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0
推荐阅读
- python - dataframe.iloc[:;:] 和 datafrane.iloc[:,:] .values 有什么区别
- scala - 将 Set 转换为 Scala Spark 中的单个对象
- javascript - 无法在 MacOS 10.13.2 上设置 Reaction Commerce
- python - 如何为不泄漏内存的大型生成器编写消费者?
- python - 如何将模块的属性添加到具有自定义行为的类
- azure - 使用 Postman 在 Azure 上发出 500 内部服务器错误 POST 请求
- java - 无法在我的依赖项中实现 'com.firebaseui:firebase-ui-auth:2.3.0'
- python - 如何让 arff.loadarff 与 urllib.request 一起从 URL 读取 arff 文件?
- ruby-on-rails - 基于关联的 Rails
- html - 我的代码中的 CSS 对齐有什么问题