首页 > 解决方案 > Deap 运行时间给出了关键错误的错误

问题描述

C:\Python27\python.exe
C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py Traceback (most 
recent call last):   File
"C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 166, in
<module>
    main_run("C:\defect4j\TinyGP")   File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 151, in
main_run
    Fitness, RawScore, Formula = main()   File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 130, in
main
    halloffame=hof, verbose=True)   File "C:\Python27\lib\site-packages\deap\algorithms.py", line 150, in
eaSimple
    fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)   File "C:/Users/Adekunle/PycharmProjects/d4jtest/newDp.py", line 82, in
eval_func
    spectrum = FaultVersionsDict[str(numberOfversion[version])] KeyError: '244'

Process finished with exit code 1

我该如何解决这个错误?这是完整的代码:

import os
import sys
import operator
import numpy as np
import pandas as pd
import time
import pickle
import warnings
import random
import itertools

import random
from deap import algorithms
from deap import base
from deap import creator
from deap import tools
import numpy
from deap import gp
import glob

warnings.filterwarnings('ignore')


def datafile():
    files = []
    for filepath in glob.iglob(r'C:\defect4j\All single fault\*.txt'):
        files.append(filepath)
    return files

def readcsv():
    for csvfiles in datafile():
        nFaultVersion = len(datafile())
        vFaultLocation = np.zeros(nFaultVersion)
        vStatementCount = np.zeros(nFaultVersion)
        i = 0
        FaultVersionsDict = {}
        nFirstFault = (pd.read_csv(csvfiles, sep=',', nrows=0)).columns[0]
        df = pd.read_csv(csvfiles, skiprows=1, header=None).values
        dset = list(list(float(elem) for elem in row) for row in df)
        vFaultLocation[i] = nFirstFault
        vStatementCount[i] = df.shape[0]
        FaultVersionsDict[str(i)] = df
        i = i + 1
    return [vFaultLocation, vStatementCount, FaultVersionsDict]


def safeDiv(left, right):
    try: return left / right
    except ZeroDivisionError: return 0

pset = gp.PrimitiveSet("MAIN", 4)
pset.addPrimitive(numpy.add, 2, name="gp_add")
pset.addPrimitive(numpy.subtract, 2, name="gp-vsub")
pset.addPrimitive(numpy.multiply, 2, name="gp_mul")
pset.addPrimitive(numpy.sqrt, 1, name="gp_sqrt")
pset.addPrimitive(safeDiv, 2, name="gp_div")
pset.addPrimitive(numpy.negative, 1, name="gp_neg")
pset.addPrimitive(numpy.cos, 1, name="gp_cos")
pset.addPrimitive(numpy.sin, 1, name="gp_sin")


#pset.addEphemeralConstant("rand101", lambda: random.randint(-1, 1))
pset.addEphemeralConstant("rand",lambda: random.random() * 100)
pset.renameArguments(ARG0='EP', ARG1="EF", ARG2="NP", ARG3="NF")

creator.create("FitnessMin", base.Fitness, weights=(1.0,))
creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMin)

toolbox = base.Toolbox()
toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=2)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr)
toolbox.register("population", tools.initRepeat, gp.PrimitiveTree, toolbox.individual)
toolbox.register("compile", gp.compile, pset=pset)

def eval_func(individual):
    #F = gp.compileADF(expr=individual, psets=pset)
    F = toolbox.compile(expr=individual)
    #F = gp.compile(expr=individual, pset=pset)
    fit = []
    for version in range(len(numberOfversion)):
    #for version in numberOfversion:
        spectrum = FaultVersionsDict[str(numberOfversion[version])]
        EP = spectrum[:, 0]
        EF = spectrum[:, 1]
        NP = spectrum[:, 2]
        NF = spectrum[:, 3]
        #print(spectrum)
        susp_v = eval(F)
        sortedSusp_v = -np.sort(-susp_v)
        faultLocation = int(vFaultLocation[numberOfversion[version]])
        susForFault = susp_v[faultLocation]
        tieCount = np.where(sortedSusp_v == susForFault)
        # firstTie = tieCount[0].min() + 1 #zero-based
        LastTie = tieCount[0].max() + 1  # the last index of a tie of faulty statement
        faultPosinRank = LastTie
        currentFit = 100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100
        fit.append(currentFit)
        # sortedIndexList = list(np.argsort(susp_v)[::-1])
        # faultPosinRank = sortedIndexList.index(vFaultLocation[numberOfversion[version]])
        # fit.append(100 - (faultPosinRank / vStatementCount[numberOfversion[version]]) * 100)
    avgFiteness = np.mean(fit)
    return avgFiteness


toolbox.register("evaluate", eval_func)
toolbox.register("select", tools.selAutomaticEpsilonLexicase)
toolbox.register("mate", gp.cxOnePoint)
toolbox.register("expr_mut", gp.genFull, min_=0, max_=2)
toolbox.register("mutate", gp.mutUniform, expr=toolbox.expr_mut, pset=pset)

toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))
toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=17))


def main():
    random.seed(318)

    pop = toolbox.population(n=300)
    hof = tools.HallOfFame(1)

    stats_fit = tools.Statistics(lambda ind: ind.fitness.values)
    stats_size = tools.Statistics(len)
    mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size)
    mstats.register("avg", numpy.mean)
    mstats.register("std", numpy.std)
    mstats.register("min", numpy.min)
    mstats.register("max", numpy.max)

    pop, log = algorithms.eaSimple(pop, toolbox, 0.5, 0.1, 40, stats=mstats,
                                   halloffame=hof, verbose=True)
    # print log
    return pop, log, hof


def main_run(outputFolder):
    if os.path.exists(outputFolder) is False:
        os.mkdir(outputFolder)
    outputFile_i = os.path.join(outputFolder, "formula.csv")
    outputFile_v = os.path.join(outputFolder, "VersionSamples.csv")
    outputFile_f = os.path.join(outputFolder, "Fiteness.csv")
    file_v = open(outputFile_v, "w")
    file_f = open(outputFile_f, "w")
    with open(outputFile_i, "w") as file:
        global numberOfversion
        for i in range(30):
            numberOfversion = random.sample(range(0, 255), 120)
            #numberOfversion = random.sample(range(0, 92), 20)  # randomly choose 20 samples from 92 faulty versions
            file_v.write(str(numberOfversion) + "\n")
            file_v.flush()

            Fitness, RawScore, Formula = main()
            file.write(Formula)
            file_f.write(str(Fitness) + "," + str(RawScore) + "\n")
            file_f.flush()
            print(i)
            file.flush()
        file.close()
        file_v.close()


if __name__ == '__main__':
    global numberOfversion
    #print(datafile())
    vFaultLocation, vStatementCount, FaultVersionsDict = readcsv()
    #print(readcsv())
    main_run("C:\defect4j\TinyGP")
    main()
    #print(eval_func(individual=readcsv()))

标签: pythondeap

解决方案


推荐阅读