首页 > 解决方案 > CS50 IDE Program Returning Blinking Cursor for PSET6 DNA

问题描述

I'm currently trying CS50 pset6. In my code, I'm trying to read a text and compute the longest run of a certain DNA or STR strand in the text so I can find the matching DNA on the CSV file. Can anyone please tell me why when I run the program on CS50 IDE, it just returns a blinking cursor? I think that there's a problem with my compute function, but I'm not certain why. Any advice would be greatly appreciated.

from sys import argv, exit
import csv

def getstring(argv):
    with open(argv[2]) as f:
        str = f.read()
        return str

def compute(tmp):
    aatg = gata = tatc = gaaa = tctg = agatc = tctag = ttttttct = 0
    #Check if DNA Sequence is AATG,GATA,TATC,GAAA,TCTG
    i = 0
    j = 5
    length = len(tmp)
    while (j <= (length - 5)):
        AAT = GAT = TAT = GAA = TCT = 0
        if tmp[i:j] == "AATG":
            AAT += 1
            i = i + 5
            j = i + 5 
        elif tmp[i:j] != "AATG":
            i = i - 5
            j = i + 5
            if AAT > aatg:
                aatg = AAT
            if tmp[i:j] == "GATA":
                GAT += 1
                i = i + 5
                j = i + 5
            if tmp[i:j] != "GATA":
                i = i - 5
                j = i + 5
                if GAT > gata:
                    gata = GAT
                if tmp[i:j] == "TATC":
                    TAT += 1
                    i = i + 5
                    j = i + 5
                if tmp[i:j] != "TATC":
                    i = i - 5
                    j = i + 5
                    if TAT > tatc:
                        tatc = TAT
                    if tmp[i:j] == "GAAA":
                        GAA += 1
                        i = i + 5
                        j = i + 5
                    if tmp[i:j] != "GAAA":
                        i = i - 5
                        j = i + 5
                        if GAA > gaaa:
                            gaaa = GAA
                        if tmp[i:j] == "TCTG":
                            TCT += 1
                            i = i + 5
                            j = i + 5
                        if tmp[i:j] != "TCTG":
                            i = i - 4
                            j = i + 5
                            if TCT > tctg:
                                tctg = TCT

    #Check if DNA Sequence is AGATC or TCTAG
    b = 6
    while (b <= (length - 6)):
        AGA = TCTA = 0
        if tmp[i:b] == "AGATC":
            AGA += 1
            i = i + 6
            b = i + 6
        elif tmp[i:b] != "AGATC":
            i = i - 6
            b = i + 6
            if AGA > agatc:
                agatc = AGA
            if tmp[i:b] == "TCTAG":
                TCTA += 1
                i = i + 6
                b = i + 6
            if tmp[i:b] != "TCTAG":
                i = i - 5
                b = i + 6
                if TCTA > tctag:
                    tctag = TCTA

    #Check if DNA Sequence is TTTTTTCT
    d = 9
    while (d <= (length - 9)):
        TTT = 0
        if tmp[i:d] == "TTTTTTCT":
            TTT += 1
            i = i + 9
            d = i + 9
        elif tmp[i:d] != "TTTTTTCT":
            i = i - 8
            d = i + 9
            if TTT > ttttttct:
                ttttttct = TTT
    return aatg, gata, tatc, gaaa, tctg, agatc, tctag, ttttttct

def main():
    if len(argv) != 3:
        print("Usage: python dna.py data.csv sequence.txt")
        exit(1)
    tmp = getstring(argv)
    AATG, GATA, TATC, GAAA, TCTG, AGATC, TCTAG, TTTTTTCT = compute(tmp)
    print(f"{AATG}, {GATA}, {TATC}") #willdeletelater
    #Store STRCOUNT in dictionary
    STRCOUNT = {
        'AGATC'  : AGATC,
        'TTTTTTCT' : TTTTTTCT,
        'AATG' : AATG,
        'TCTAG' : TCTAG,
        'GATA' : GATA,
        'TATC' : TATC,
        'GAAA' : GAAA,
        'TCTG' : TCTG
    }
    #Only large csv has all the STR dna sequence, small csv only has AGATC,AATG,TATC
    with open(argv[1], "r") as CSVfile:
        read = csv.reader(CSVfile)
        c=0
        field={}
        for row in read:
            field[c] = row
            c += 1
        ncolumns = len (field[0])
        CSVfile.seek(0)
        reader = csv.DictReader(CSVfile)
        for row in reader:
            if ncolumns == 4:
                if row['AGATC'] == STRCOUNT['AGATC'] and row['AATG'] == STRCOUNT['AATG'] and row['TATC'] == STRCOUNT['TATC']:
                    print(f"{row['name']}")
                    exit (0)
            elif ncolumns != 4:
                if row['AGATC'] == STRCOUNT['AGATC'] and row['TTTTTTCT'] == STRCOUNT['TTTTTTCT'] and row['AATG'] == STRCOUNT['AATG'] and row['TCTAG'] == STRCOUNT['TCTAG'] and row['GATA'] == STRCOUNT['GATA'] and row['TATC'] == STRCOUNT['TATC'] and row['GAAA'] == STRCOUNT['GAAA'] and row['TCTG'] == STRCOUNT['TCTG']:
                    print(f"{row['name']}")
                    exit (0)
        print("No match")
        exit (1)

main ()

Here is the DNA sequence for tmp:

AAGGTAAGTTTAGAATATAAAAGGTGAGTTAAATAGAATAGGTTAAAATTAAAGGAGATCAGATCAGATCAGATCTATCTATCTATCTATCTATCAGAAAAGAGTAAATAGTTAAAGAGTAAGATATTGAATTAATGGAAAATATTGTTGGGGAAAGGAGGGATAGAAGG

Here is 10 lines of the large CSV file:

name,AGATC,TTTTTTCT,AATG,TCTAG,GATA,TATC,GAAA,TCTG
Albus,15,49,38,5,14,44,14,12
Cedric,31,21,41,28,30,9,36,44
Draco,9,13,8,26,15,25,41,39
Fred,37,40,10,6,5,10,28,8
Ginny,37,47,10,23,5,48,28,23
Hagrid,25,38,45,49,39,18,42,30
Harry,46,49,48,29,15,5,28,40
Hermione,43,31,18,25,26,47,31,36
James,46,41,38,29,15,5,48,22

Here is the link to PSET6 DNA Specifications: https://cs50.harvard.edu/x/2020/psets/6/dna/#:~:text=python%20dna.py%20databases/small.csv%20sequences/1.txt

标签: pythoncs50dna-sequence

解决方案


推荐阅读