首页 > 解决方案 > python 一个附加到三个线程的目标函数

问题描述

我有一个目标函数。我创建了三个线程并将这个单一目标分配给每个线程。我将同一数据集的不同切片作为输入参数传递给线程。我还将一个队列传递给线程以获取返回值。在线程内部,队列正在被填充。

我的问题是在 t.join 之后,我得到了所有三个线程的空队列。

问题:

  1. 是否可以将相同的目标函数传递给多个线程
  2. 如果以上都可以,为什么我没有从队列中取回物品

目标函数

import pandas as pd
import numpy as np
import threading
import queue

from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer

class newtxtfeat:

    def __init__(self):

        self.gene_can_list = ['APC','ATM','BMPR1A','BRCA1','BRCA2','CDK4','CDKN2A','CHK2','CREBBP','EGFR','EP300','ETV6','FHIT','FLT3','HRAS','KIT','MET','MLH1','MLL','NTR3','NTRK1','PAX8','PDGFRA','PPARγ','PRCC','PRKAR1A','PTEN','RET','RUNXBP2','STK11','TFE3','TGF-β','TGF-βRII','TP53','WWOX','PALB2','CHEK2','CDH1','BARD1','RAD51C','RAD51D','NBN','NF1']
        self.df_new_text_feat= pd.DataFrame()
        self.data_text = None

    def load_data(self):
        """
            training_text is part of the personel cancer detection dataset that can be downloaded from kaggle
        """
        self.data_text =pd.read_csv("../data/training_text",sep="\|\|",engine="python",names=["ID","TEXT"],skiprows=1)

        self.genr_canc_dict = None

    def fnd_canc_wrd(self,gn_cn_dt,col, tkn, canctcnt, cancntcnt):

        return canctcnt, cancntcnt

    def crea_txt_ft(self,dt_txt_smp, gncanlst,df_new_txtft, opqueue):

        for index, row in dt_txt_smp.iterrows():

            corpora = [row['TEXT']]

            # instantiate the vectorizer object
            cvec = CountVectorizer(stop_words='english', lowercase=False )

            # convert the documents into a document-term matrix
            wm = cvec.fit_transform(corpora)

            # retrieve the terms found in the corpora
            tokens = cvec.get_feature_names()

            # the following counts can be intialized to any constant
            canc_tech_count = 0
            canc_nontech_count = 0
            gene_count = 0
            protin_count = 0
            mutant_count = 0
            tumor_count = 0
            leukemia_count = 0
            amino_count = 0
            spec_gene_count = 0


            for i in range(0,len(tokens)):
                onechar = tokens[i][0]
                if onechar == 'a':     
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_a', tokens[i],canc_tech_count,canc_nontech_count)

                elif onechar == 'b':        

                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_b', tokens[i],canc_tech_count,canc_nontech_count)

                elif onechar == 'c':     
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_c', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'd':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_d', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'e':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_e', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'f':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_f', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'g':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_g', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'h':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_h', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'i':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_i', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'j':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_j', tokens[i],canc_tech_count,canc_nontech_count)


                elif onechar == 'k':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_k', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'l':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_l', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'm':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_m', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'n':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_n', tokens[i],canc_tech_count,canc_nontech_count)        


                elif onechar == 'o':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_o', tokens[i],canc_tech_count,canc_nontech_count)


                elif onechar == 'p':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_p', tokens[i],canc_tech_count,canc_nontech_count)

                elif onechar == 'q':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_q', tokens[i],canc_tech_count,canc_nontech_count)

                elif onechar == 'r':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_r', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 's':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_s', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 't':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_t', tokens[i],canc_tech_count,canc_nontech_count)

                elif onechar == 'u':                
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_u', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'v':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_v', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'w':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_w', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'x':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_x', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'y':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_y', tokens[i],canc_tech_count,canc_nontech_count)        

                elif onechar == 'z':        
                    canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_z', tokens[i],canc_tech_count,canc_nontech_count)        

                else:

                    canc_nontech_count += 1

                if tokens[i] in gncanlst:
                    spec_gene_count += 1

                if tokens[i] == 'gene':
                    gene_count += 1
                elif tokens[i] == 'protein':
                    protin_count += 1
                elif tokens[i] in ['mutation','mutant']:
                    mutant_count += 1
                elif tokens[i] == 'tumor':
                    tumor_count  += 1
                elif tokens[i] == 'leukemia':
                    leukemia_count += 1            
                elif tokens[i] == 'amino':
                    amino_count += 1


                #print(canc_tech_count)  
                #print('not found',canc_nontech_count )
                #print(i)
            df_new_txtft =df_new_txtft.append({'canc_tech_cnt':canc_tech_count,'canc_tech_rto': canc_tech_count / i, \
            'tot_wrds':i,'canc_gene_cnt':spec_gene_count,'gene_cnt':gene_count,'protin_cnt':protin_count,\
            'muta_cnt':mutant_count,'tumor_cnt':tumor_count,'leuk_cnt':leukemia_count,'amino_cnt':amino_count}, ignore_index=True) 

        opqueue.put(df_new_txtft)

if __name__ == "__main__"   :

    newfeat = newtxtfeat()
    newfeat.load_data()

    newfeat.q1df = queue.Queue()
    newfeat.q2df = queue.Queue()
    newfeat.q3df = queue.Queue()

    newfeat.df_new_txtft_1 = pd.DataFrame()
    newfeat.df_new_txtft_2 = pd.DataFrame()
    newfeat.df_new_txtft_3 = pd.DataFrame()

    newfeat.df_text_1 = newfeat.data_text[1:11]
    newfeat.df_text_2 = newfeat.data_text[11:21]
    newfeat.df_text_3 = newfeat.data_text[21:31]

    lst1 = newfeat.gene_can_list
    lst2 = newfeat.gene_can_list
    lst3 = newfeat.gene_can_list

    t1 = threading.Thread(target=newfeat.crea_txt_ft, args=(newfeat.df_text_1, lst1 ,newfeat.df_new_txtft_1,newfeat.q1df))
    t2 = threading.Thread(target=newfeat.crea_txt_ft, args=(newfeat.df_text_2, lst2 ,newfeat.df_new_txtft_2,newfeat.q2df))
    t3 = threading.Thread(target=newfeat.crea_txt_ft, args=(newfeat.df_text_3, lst3 ,newfeat.df_new_txtft_3,newfeat.q3df))

    t1.start()
    t2.start()
    t3.start()

    t1.join()
    t2.join()
    t3.join()

    while not newfeat.q1df.empty():
        newfeat.df_new_txtft_1 = newfeat.q1df.get()

    while not newfeat.q2df.empty():
        newfeat.df_new_txtft_2 = newfeat.q2df.get()


    while True:
        if newfeat.q3df.empty():
            print('Third Queue is empty and closed . Exiting thread.')
            break
        try:
            newfeat.df_new_txtft_3 = newfeat.q3df.get()
        except:
            continue

    if (not newfeat.df_new_txtft_1 ) and  (not newfeat.df_new_txtft_2) and (not newfeat.df_new_txtft_3) :
        newfeat.df_new_text_feat = pd.concat([newfeat.df_new_txtft_1,newfeat.df_new_txtft_2,newfeat.df_new_txtft_3],ignore_index=True)

        print(newfeat.df_new_text_feat.info())
        print(newfeat.df_new_text_feat.shape)

我希望三个队列从线程返回更新值。

实际上,我正在返回一个空队列。

标签: pythonmultithreading

解决方案


推荐阅读