python-3.x - 二维数组比较和输出文件问题 | Python
问题描述
我正在尝试比较两个不同的不相等的二维数组,Array1:(RNA 序列)[63:3]
AAA
AAC
AGC
UAC
Array2:RNA_coordinate_source_file [128:11]
ATOM 142 H1' A 5 -1.227 8.932 12.229 1.00 0.00 H
ATOM 142 H1' A 5 -1.227 8.932 12.229 1.00 0.00 H
ATOM 142 H1' G 5 -1.227 8.932 12.229 1.00 0.00 H
ATOM 143 N1 G 5 -0.447 7.106 12.630 1.00 0.00 N
ATOM 143 N1 G 5 -0.447 7.106 12.630 1.00 0.00 N
ATOM 143 N1 C 5 -0.447 7.106 12.630 1.00 0.00 N
ATOM 142 H1' C 5 -1.227 8.932 12.229 1.00 0.00 H
ATOM 143 N1 G 5 -0.447 7.106 12.630 1.00 0.00 N
ATOM 143 N1 G 5 -0.447 7.106 12.630 1.00 0.00 N
ATOM 143 N1 C 5 -0.447 7.106 12.630 1.00 0.00 N
当 RNA 序列(array1)是三元组时,我想编写一个新文件,并且一个新文件具有相应的名称,如 AAG.pdb,并且所有行的第 4 列对应于 AAG 坐标。
out 文件应该从 array2 中提取所有行,然后文件将具有“A”的所有坐标,然后是 A 和 G,就像所有三元组一样,单独的文件和单独的名称。
我发现无法比较不相等的数组并且我的以下代码不起作用,我需要帮助谢谢
import re
with open("drive/My Drive/RES/dimeric_force_field/Test/python_read/cropped.pdb", "r") as file:
arr = [ re.split("[ \t\r\n]+", line)[:-1] for line in file ]
#re.split("[ \t\r\n]+", line)[:-1]
#print ("pdb file as array=",arr)
#print("lenght=",len(arr))
fw = open("drive/My Drive/Colab Notebooks/pdb_out.pdb", "w")
myArray = ["A", "C", "G", "U"]
triplet = []
for a1 in myArray:
for a2 in myArray:
for a3 in myArray:
triplet.append(a1+a2+a3)
print("triplets=",triplet)
print("triplet array length=",len(triplet))
print("arr=",arr)
print("triplet array length=",len(arr))
i = 64
while i < len(triplet):
s1 = triplet[i][0] #U
s2 = triplet[i][1] #U
s3 = triplet[i][2] #G
#print (srt(i)+' '+s1+' '+s2+' '+s3)
#print (s1+' '+s2+' '+s
j= 0
while j < (len(arr)-1):
el1 = arr[j][0] # line_def
el2 = arr[j][1] # atom_no
el3 = arr[j][2] # atom_name
el4 = arr[j][3] # residue_name
el5 = arr[j][4] # residure_no
el6 = arr[j][5] # x
el7 = arr[j][6] # y
el8 = arr[j][7] # z
el9 = arr[j][8] #
el10 = arr[j][9] #
el11 = arr[j][10] #
#print (str(j)+'\t'+el1+'\t'+el2+'\t'+el3+'\t'+el4+'\t'+el5+'\t'+el6+'\t'+el7+'\t'+el8+'\t'+el9+'\t'+el10+'\t'+el11)
#print (str(j)+'\t'+line_def+'\t'+atom_no+'\t'+atom_name+'\t'+residue_name+'\t'+residure_no+'\t'+x+'\t'+y+'\t'+z+'\t'+el9+'\t'+el10+'\t'+el11)
if s1==el4:
#print(str(i)+" "+str(j)+" "+"slected=","\t".join(arr[j]))
print("\t".join(arr[j]))
fw.write(str(i)+ " "+ str(j) + "\t".join(arr[j])+'\n')
if s2==el4:
#print(str(i)+" "+str(j)+" "+"slected=","\t".join(arr[j]))
print("\t".join(arr[j]))
fw.write(str(i)+" " + str(j) + "\t".join(arr[j])+'\n')
if s3==el4:
#a= print(str(i)+" "+str(j)+" "+"slected=","\t".join(arr[j]))
print("\t".join(arr[j]))
fw.write(str(i) +" "+ str(j) + "\t".join(arr[j])+'\n')
#fw.write("\t".join(arr[j])
#fw.write(p2)
#fw.write(p3)
j+=1
i+=1
fw.write("TER")
fw.close()
fr=open("drive/My Drive/Colab Notebooks/pdb_out.pdb", "r")
print(fr.read())
解决方案
此代码有效,
while i < len(triplet) : # goes to 0 t 63
fw = open("drive/My Drive/Colab Notebooks/test/"+str(triplet[i])+".pdb", "w") #
#fw = open("drive/My Drive/Colab Notebooks/test/str(triplet[i])+".pdb", "w")
if s1=="A":
fw.write( ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_A ] ) ) ) # no '\n' in frst set because it avoid first line of the pdb file
n1="A"
elif s1=="C":
fw.write( ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_C ] ) ) )
n1="C"
elif s1=="G":
fw.write( ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_G ] ) ) )
n1="G"
else: #print U
fw.write( ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_U ] ) ) )
n1="G"
#fw.write('\n'+str(i)+' '+"s1 END"+'\n')
if s2=="A":
fw.write( '\n' + ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_A ] ) ) )
n2="A"
elif s2=="C":
fw.write( '\n' + ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_C ] ) ) )
n2="C"
elif s2=="G":
fw.write( '\n' + ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_G ] ) ) )
n2=""
else: #print U
fw.write( '\n' + ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_U ] ) ) )
#fw.write('\n'+str(i)+' '+"s2 END"+'\n')
if s3=="A":
fw.write( '\n' + ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_A ] ) ) )
elif s3=="C":
fw.write( '\n' + ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_C ] ) ) )
elif s3=="G":
fw.write( '\n' + ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_G ] ) ) )
else: #print U
fw.write( '\n' + ( '\n'.join( [ '\t'.join( [ str(Y) for Y in lines] ) for lines in NUC_U ] ) ) )
#fw.write('\n'+str(i)+' '+"s3 END"+'\n')
fw.close()
i+=1
fr=open("drive/My Drive/Colab Notebooks/pdb_out.pdb", "r")
print(fr.read())
推荐阅读
- r - 如何使用现有 data.table 的列名构造一个空的 data.table?
- profiling - perf 报告失败并出现错误“数据字段大小为 0,这是意外的。'perf record' 命令是否正确终止?”
- c# - 为什么 runat="server" 会导致失败?
- php - 显示随机生成的代码的问题
- php - 无法为 laravel 应用程序(mysql)中的数据库文本字段提供默认值
- php - 使用 Flickr 照片搜索 API 时出错:API 密钥无效(密钥格式无效)
- reactjs - Material-UI Divider 在屏幕右侧添加了一个滚动条,当我从组件中删除它时,没有 ScrollBar
- java - 如何关闭收集用户输入的 ArrayList 的 while 循环
- python-3.x - 寻找一种更 Pythonic 的方式将列表列表转换为字符串
- api - Block.io 代币提现