python - python 一个附加到三个线程的目标函数
问题描述
我有一个目标函数。我创建了三个线程并将这个单一目标分配给每个线程。我将同一数据集的不同切片作为输入参数传递给线程。我还将一个队列传递给线程以获取返回值。在线程内部,队列正在被填充。
我的问题是在 t.join 之后,我得到了所有三个线程的空队列。
问题:
- 是否可以将相同的目标函数传递给多个线程
- 如果以上都可以,为什么我没有从队列中取回物品
目标函数
import pandas as pd
import numpy as np
import threading
import queue
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
class newtxtfeat:
def __init__(self):
self.gene_can_list = ['APC','ATM','BMPR1A','BRCA1','BRCA2','CDK4','CDKN2A','CHK2','CREBBP','EGFR','EP300','ETV6','FHIT','FLT3','HRAS','KIT','MET','MLH1','MLL','NTR3','NTRK1','PAX8','PDGFRA','PPARγ','PRCC','PRKAR1A','PTEN','RET','RUNXBP2','STK11','TFE3','TGF-β','TGF-βRII','TP53','WWOX','PALB2','CHEK2','CDH1','BARD1','RAD51C','RAD51D','NBN','NF1']
self.df_new_text_feat= pd.DataFrame()
self.data_text = None
def load_data(self):
"""
training_text is part of the personel cancer detection dataset that can be downloaded from kaggle
"""
self.data_text =pd.read_csv("../data/training_text",sep="\|\|",engine="python",names=["ID","TEXT"],skiprows=1)
self.genr_canc_dict = None
def fnd_canc_wrd(self,gn_cn_dt,col, tkn, canctcnt, cancntcnt):
return canctcnt, cancntcnt
def crea_txt_ft(self,dt_txt_smp, gncanlst,df_new_txtft, opqueue):
for index, row in dt_txt_smp.iterrows():
corpora = [row['TEXT']]
# instantiate the vectorizer object
cvec = CountVectorizer(stop_words='english', lowercase=False )
# convert the documents into a document-term matrix
wm = cvec.fit_transform(corpora)
# retrieve the terms found in the corpora
tokens = cvec.get_feature_names()
# the following counts can be intialized to any constant
canc_tech_count = 0
canc_nontech_count = 0
gene_count = 0
protin_count = 0
mutant_count = 0
tumor_count = 0
leukemia_count = 0
amino_count = 0
spec_gene_count = 0
for i in range(0,len(tokens)):
onechar = tokens[i][0]
if onechar == 'a':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_a', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'b':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_b', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'c':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_c', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'd':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_d', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'e':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_e', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'f':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_f', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'g':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_g', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'h':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_h', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'i':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_i', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'j':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_j', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'k':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_k', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'l':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_l', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'm':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_m', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'n':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_n', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'o':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_o', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'p':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_p', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'q':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_q', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'r':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_r', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 's':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_s', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 't':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_t', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'u':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_u', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'v':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_v', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'w':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_w', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'x':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_x', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'y':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_y', tokens[i],canc_tech_count,canc_nontech_count)
elif onechar == 'z':
canc_tech_count,canc_nontech_count = self.fnd_canc_wrd(self.genr_canc_dict, 'col_z', tokens[i],canc_tech_count,canc_nontech_count)
else:
canc_nontech_count += 1
if tokens[i] in gncanlst:
spec_gene_count += 1
if tokens[i] == 'gene':
gene_count += 1
elif tokens[i] == 'protein':
protin_count += 1
elif tokens[i] in ['mutation','mutant']:
mutant_count += 1
elif tokens[i] == 'tumor':
tumor_count += 1
elif tokens[i] == 'leukemia':
leukemia_count += 1
elif tokens[i] == 'amino':
amino_count += 1
#print(canc_tech_count)
#print('not found',canc_nontech_count )
#print(i)
df_new_txtft =df_new_txtft.append({'canc_tech_cnt':canc_tech_count,'canc_tech_rto': canc_tech_count / i, \
'tot_wrds':i,'canc_gene_cnt':spec_gene_count,'gene_cnt':gene_count,'protin_cnt':protin_count,\
'muta_cnt':mutant_count,'tumor_cnt':tumor_count,'leuk_cnt':leukemia_count,'amino_cnt':amino_count}, ignore_index=True)
opqueue.put(df_new_txtft)
if __name__ == "__main__" :
newfeat = newtxtfeat()
newfeat.load_data()
newfeat.q1df = queue.Queue()
newfeat.q2df = queue.Queue()
newfeat.q3df = queue.Queue()
newfeat.df_new_txtft_1 = pd.DataFrame()
newfeat.df_new_txtft_2 = pd.DataFrame()
newfeat.df_new_txtft_3 = pd.DataFrame()
newfeat.df_text_1 = newfeat.data_text[1:11]
newfeat.df_text_2 = newfeat.data_text[11:21]
newfeat.df_text_3 = newfeat.data_text[21:31]
lst1 = newfeat.gene_can_list
lst2 = newfeat.gene_can_list
lst3 = newfeat.gene_can_list
t1 = threading.Thread(target=newfeat.crea_txt_ft, args=(newfeat.df_text_1, lst1 ,newfeat.df_new_txtft_1,newfeat.q1df))
t2 = threading.Thread(target=newfeat.crea_txt_ft, args=(newfeat.df_text_2, lst2 ,newfeat.df_new_txtft_2,newfeat.q2df))
t3 = threading.Thread(target=newfeat.crea_txt_ft, args=(newfeat.df_text_3, lst3 ,newfeat.df_new_txtft_3,newfeat.q3df))
t1.start()
t2.start()
t3.start()
t1.join()
t2.join()
t3.join()
while not newfeat.q1df.empty():
newfeat.df_new_txtft_1 = newfeat.q1df.get()
while not newfeat.q2df.empty():
newfeat.df_new_txtft_2 = newfeat.q2df.get()
while True:
if newfeat.q3df.empty():
print('Third Queue is empty and closed . Exiting thread.')
break
try:
newfeat.df_new_txtft_3 = newfeat.q3df.get()
except:
continue
if (not newfeat.df_new_txtft_1 ) and (not newfeat.df_new_txtft_2) and (not newfeat.df_new_txtft_3) :
newfeat.df_new_text_feat = pd.concat([newfeat.df_new_txtft_1,newfeat.df_new_txtft_2,newfeat.df_new_txtft_3],ignore_index=True)
print(newfeat.df_new_text_feat.info())
print(newfeat.df_new_text_feat.shape)
我希望三个队列从线程返回更新值。
实际上,我正在返回一个空队列。
解决方案
推荐阅读
- woocommerce - 限制woocommerce类别描述
- arrays - 如何在一个数组中推送多个图像?
- vue.js - 如何将服务工作者添加到 Vue.js 项目?
- c++ - 无法在分配 flex/bison 中将 'char*' 转换为 'float'
- python - 将列的值从列表更改为熊猫系列
- reactjs - 处理事务时出现 VM 异常:测试 createRequest 方法时恢复错误
- javacard - 为什么我无法在 Javacard 中删除或替换插入的带有 KeyVersion=0x70 的 Receipt Key?
- performance - 计算机中最有限和最昂贵的资源是什么?
- python-3.x - 尽管我的 CPU 支持 AVX,但 Tensorflow 2.4 仍无法工作
- c - cProgramming:误解引用指向 int 结构和嵌套结构(也是 int)的元素的指针