首页 > 解决方案 > 如何在 Python 3 中查找项目的位置并显示

问题描述

我有一个程序,我想显示每个字母的位置,从另一个文件中获取它我从中获取信息的文件:

>sp|A1AA21|PEPT_ECOK1 Peptidase T OS=Escherichia coli O1:K1 / APEC OX=405955 GN=pepT PE=3 SV=1
MDKLLERFLNYVSLDTQSKAGVRQVPSTEGQWKLLHLLKEQLEEMGLINVTLSEKGTLMA
TLPANVPGDIPAIGFISHVDTSPDCSGKNVNPQIVENYRGGDIALGIGDEVLSPVMFPVL
HQLLGQTLITTDGKTLLGADDKAGIAEIMTALAVLQQKNIPHGDIRVAFTPDEEVGKGAK
HFDVDAFDARWAYTVDGGGVGELEFENFNAASVNIKIVGNNVHPGTAKGVMVNALSLAAR
IHAEVPADESPEMTEGYEGFYHLASMKGTVERADMHYIIRDFDRKQFEARKRKMMEIAKK
VGKGLHPDCYIELVIEDSYYNMREKVVEHPHILDIAQQAMRDCDIEPELKPIRGGTDGAQ
LSFMGLPCPNLFTGGYNYHGKHEFVTLEGMEKAVQVIVRIAELTAQRK

我想显示类型中的位置:

A ------ 19,59,64...
B ------- -1
M --------  0,44,58...

esse é o código:

f = open('e.coli.fasta.txt','r')
sequencia = f.readlines()
amino = [] #para colocar o arquivo numa lista só com o texto de interresse 

for linha in sequencia:
  if linha.find('>') != 0:
    amino.append(linha)

tfasta= "".join(amino)

aminoacidos = {}
aminoacidos = {'A':'Alanina','B':'Ácido aspártico ou Asparagina','C':'Cisteína', 'D':'Ácido aspártico','E':'Ácido glutâmico','F':'Fenilalanina','G':'Glicina','H':'Histidina','I':'Isoleucina','J':'Leucina (L) ou Isoleucina','K':'Lisina','L':'Leucina','M':'Metionina','N':'Asparagina','O':'Pirrolisina','P':'Prolina','Q':'Glutamina','R':'Arginina','S':'Serina','T':'Treonina','U':'Selenocisteína','V':'Valina','W':'Triptofano','X':'qualquer','Y':'Tirosina'}

def ocorrencias(string):
  result = {}
  chaves = 'ABCDEFGHIJKLMNOPQRSTUVWXY'
  for i in chaves:
    result[i] = tfasta.count(i)
  return result

ocor = ocorrencias(tfasta)

    index = 0
lista = []
while index < len(tfasta):
  index = tfasta.find('A',index)
  lista.append(index)
  if index == -1:
    break 
  index += 1

with open ('PeptidadeT-aminoacidos','w') as p:
  for i in range(65,90):
    a = ('%s' % (chr(i)))
    p.write('-{:4s}------{:5s}------{}\n'.format(a, aminoacidos[a], ocor[a]))
  for i in enumerate(lista):
    p.write('{}\n'.format(i))

标签: pythonlist

解决方案


制作一个positions字典,其值是索引列表。然后遍历文本并将索引附加到适当的列表中。

positions = {}
for index, char in enumerate(tfasta):
    if char in positions:
        positions[char].append(index)
    else:
        positions[char] = [index]
print(positions)

这是完整的代码:

with open('e.coli.fasta.txt','r') as f:
    sequencia = f.readlines()
amino = [] #para colocar o arquivo numa lista só com o texto de interresse 

for linha in sequencia:
    if linha.find('>') != 0:
        amino.append(linha)

tfasta= "".join(amino)

aminoacidos = {}
aminoacidos = {'A':'Alanina','B':'Ácido aspártico ou Asparagina','C':'Cisteína', 'D':'Ácido aspártico','E':'Ácido glutâmico','F':'Fenilalanina','G':'Glicina','H':'Histidina','I':'Isoleucina','J':'Leucina (L) ou Isoleucina','K':'Lisina','L':'Leucina','M':'Metionina','N':'Asparagina','O':'Pirrolisina','P':'Prolina','Q':'Glutamina','R':'Arginina','S':'Serina','T':'Treonina','U':'Selenocisteína','V':'Valina','W':'Triptofano','X':'qualquer','Y':'Tirosina'}

def ocorrencias(string):
    result = {}
    chaves = 'ABCDEFGHIJKLMNOPQRSTUVWXY'
    for i in chaves:
        result[i] = tfasta.count(i)
        return result

ocor = ocorrencias(tfasta)

def positions(string):
    chaves = 'ABCDEFGHIJKLMNOPQRSTUVWXY'
    pos = {key: [] for key in chaves}
    for index, char in enumerate(string):
        if char in pos:
            pos[char].append(index)
    return pos

lista = positions(tfasta)

with open ('PeptidadeT-aminoacidos','w') as p:
  for i in range(65,90):
    a = ('%s' % (chr(i)))
    p.write('-{:4s}------{:5s}------{}\n'.format(a, aminoacidos[a], ocor[a]))
  for i in enumerate(lista):
    p.write('{}\n'.format(i))

推荐阅读