particle-swarm - 在粒子群优化程序中使用时,keyerror 'micro avg' 无法在 python 中运行
问题描述
# ------------------------------------------------------------------------------+
#
# Nathan A. Rooy
# Simple Particle Swarm Optimization (PSO) with Python
# July, 2016
#
# ------------------------------------------------------------------------------+
from __future__ import division
# --- IMPORT DEPENDENCIES ------------------------------------------------------+
import csv
import re
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem import WordNetLemmatizer
from sklearn import metrics, tree
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
import random
import math
# --- COST FUNCTION ------------------------------------------------------------+
# function we are attempting to optimize (minimize)
def func1(x):
total = 0
for i in range(len(x)):
total += x[i]**2
return total
# --- MAIN ---------------------------------------------------------------------+
class Particle:
def __init__(self, x0):
self.position_i = [] # particle position
self.velocity_i = [] # particle velocity
self.pos_best_i = [] # best position individual
self.err_best_i = -1 # best error individual
self.err_i = -1 # error individual
for i in range(0, num_dimensions):
self.velocity_i.append(random.uniform(-1, 1))
self.position_i.append(x0[i])
# evaluate current fitness
def evaluate(self, costFunc):
self.err_i = costFunc(self.position_i)
# check to see if the current position is an individual best
if self.err_i < self.err_best_i or self.err_best_i == -1:
self.pos_best_i = self.position_i.copy()
self.err_best_i = self.err_i
# update new particle velocity
def update_velocity(self, pos_best_g):
# constant inertia weight (how much to weigh the previous velocity)
w = 0.5
c1 = 1 # cognative constant
c2 = 2 # social constant
for i in range(0, num_dimensions):
r1 = random.random()
r2 = random.random()
vel_cognitive = c1*r1*(self.pos_best_i[i]-self.position_i[i])
vel_social = c2*r2*(pos_best_g[i]-self.position_i[i])
self.velocity_i[i] = w*self.velocity_i[i]+vel_cognitive+vel_social
# update the particle position based off new velocity updates
def update_position(self, bounds):
for i in range(0, num_dimensions):
# print(self.position_i[i])
# print(self.velocity_i[i])
self.position_i[i] = self.position_i[i] + self.velocity_i[i]
# adjust maximum position if necessary
if self.position_i[i] > bounds[i][1]:
self.position_i[i] = bounds[i][1]
# adjust minimum position if neseccary
if self.position_i[i] < bounds[i][0]:
self.position_i[i] = bounds[i][0]
class PSO():
def __init__(self, costFunc, x0, bounds, num_particles, maxiter):
global num_dimensions
num_dimensions = len(x0)
err_best_g = -1 # best error for group
pos_best_g = [] # best position for group
# establish the swarm
swarm = []
for i in range(0, num_particles):
if i > num_particles/2:
x = [random.randint(x0[0], bounds[0][1]), random.randint(
x0[1], bounds[1][1]), random.randint(x0[2], bounds[2][1]), random.random()]
else:
x = [random.randint(bounds[0][0], x0[0]), random.randint(
bounds[1][0], x0[1]), random.randint(bounds[2][0], x0[2]), random.random()]
swarm.append(Particle(x))
# begin optimization loop
i = 0
while i < maxiter:
# print i,err_best_g
# cycle through particles in swarm and evaluate fitness
print("Iteration:", i)
for j in range(0, num_particles):
swarm[j].evaluate(costFunc)
# determine if current particle is the best (globally)
if swarm[j].err_i < err_best_g or err_best_g == -1:
pos_best_g = list(swarm[j].position_i)
err_best_g = float(swarm[j].err_i)
# cycle through swarm and update velocities and position
for j in range(0, num_particles):
swarm[j].update_velocity(pos_best_g)
swarm[j].update_position(bounds)
i += 1
# print final results
print('FINAL:')
print(pos_best_g)
print(err_best_g)
print('Accuracy:', 1-err_best_g)
# --- RUN ----------------------------------------------------------------------+
df = pd.read_csv('dataset.tsv', sep='\t', quoting=csv.QUOTE_NONE, dtype=str, encoding='utf-8',
header=None, names=["instance", "text", "id", "sentiment", "is_sarcastic"])
###
""" Functions for text pre-processing """
def remove_URL(sample):
"""Remove URLs from a sample string"""
return re.sub(r"http\S+", " ", sample)
def remove_punctuation(sample):
"""Remove punctuations from a sample string"""
return re.sub(r'[^\w\s\&\#\@\$\%\_]', '', sample)
def myTokenizer(sample):
"""Customized tokenizer"""
new_words = []
words = sample.split(' ')
new_words = [word for word in words if len(
word) >= 2 and not word.startswith('au') and not word.startswith('#aus')]
return new_words
def remove_stopwords_NLTK(sample):
"""Remove stopwords using NLTK"""
stopWords = set(stopwords.words('english'))
words = [w for w in sample.split(' ') if len(w) >= 2]
filteredText = ""
for word in words:
if word not in stopWords:
filteredText = filteredText + word + " "
return filteredText.rstrip()
def remove_digits(input_text):
return re.sub('\d+', '', input_text)
def porter_stem(sample):
"""Stemming"""
words = [w for w in sample.split(' ') if len(w) >= 2]
ps = PorterStemmer()
stemmed_text = ""
for word in words:
stemmed_text = stemmed_text + ps.stem(word) + " "
return stemmed_text.rstrip()
def myPreprocessor(sample):
"""Customized preprocessor"""
sample = remove_URL(sample)
sample = sample.lower()
sample = remove_stopwords_NLTK(sample)
sample = remove_punctuation(sample)
sample = remove_digits(sample)
sample = porter_stem(sample)
return sample
""" Data creation """
text_data = np.array([])
# Read tweets
for text in df.text:
text_data = np.append(text_data, text)
# creating target classes
Y = np.array([])
for text in df.sentiment:
Y = np.append(Y, text)
# First 1500 for training set, last 500 for test set
X_train_, X_test_, y_train, y_test = train_test_split(
text_data, Y, test_size=0.25, shuffle=False)
def classifierCost(n):
print("Trying: ", n[0], n[1], n[2], n[3])
count = CountVectorizer(preprocessor=myPreprocessor, tokenizer=myTokenizer,
max_features=round(n[0]), ngram_range=(1, round(n[1])), min_df=round(n[2]))
# count = TfidfVectorizer(preprocessor=myPreprocessor, tokenizer=myTokenizer,
# max_features=round(n[0]), ngram_range=(1, round(n[1])), min_df=round(n[2]), use_idf=True)
X_train = count.fit_transform(X_train_).toarray()
X_test = count.transform(X_test_).toarray()
clf = MultinomialNB(alpha=n[3], fit_prior=True)
#clf = BernoulliNB(alpha=1.0, fit_prior=True)
model = clf.fit(X_train, y_train)
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
y_pred = model.predict(X_train)
report1 = classification_report(y_train, y_pred, output_dict=True)
return 1 - 2*(report['micro avg']['f1-score']*report1['micro avg']['f1-score'])/(report['micro avg']['f1-score']+report1['micro avg']['f1-score'])
# initial starting location [x1,x2...]
initial = [400, 1, 5, 0.5]
# input bounds [(x1_min,x1_max),(x2_min,x2_max)...]
bounds = [(100, 1500), (1, 3), (0, 5), (0.3, 1)]
PSO(classifierCost, initial, bounds, num_particles=20, maxiter=10)
所以这些错误是这样的:尝试新的跨平台 PowerShell https://aka.ms/pscore6
PS D:\TUGAS AKHIR\Coba2 aja\PSO coba2> & "c:/program files/python39/python.exe" "d:/TUGAS AKHIR/Coba2 aja/PSO coba2/swarmSentiment.py" 迭代:0 尝试:211 1 0 0.9045385702790308 Traceback(最近一次调用最后):文件“d:\TUGAS AKHIR\Coba2 aja\PSO coba2\swarmSentiment.py”,第 237 行,在 PSO(分类器成本,初始,边界,num_particles=20,maxiter=10)文件“d:\TUGAS AKHIR\Coba2 aja\PSO coba2\swarmSentiment.py”,第 116 行,在init swarm[j].evaluate(costFunc) 文件“d:\TUGAS AKHIR\Coba2 aja\PSO coba2\swarmSentiment.py”,第 53 行,在评估 self.err_i = costFunc(self.position_i) 文件“d:\TUGAS AKHIR \Coba2 aja\PSO coba2\swarmSentiment.py",第 230 行,在分类器成本中返回 1 - 2*(report['micro avg']['f1-score']*report1['micro avg']['f1-score '])/(report['micro avg']['f1-score']+report1['micro avg']['f1-score']) KeyError: 'micro avg' PS D:\TUGAS AKHIR\Coba2 aja \PSO coba2>
如何解决这些问题?,我不得不尝试安装数学但无法工作
解决方案
推荐阅读
- react-native - 在 Linux 上的 PhpStorm / WebStorm 上全局设置 ANDROID_HOME 变量
- javascript - 如何从一个类中获取按钮内使用的所有类名?
- android - 通过 NodeJS 与 MangoDB 反应原生
- python - Python argparse:使用带方括号的元变量?
- python - UnsupportedOperation:尝试运行 Python 应用服务器时不可写
- angular - 在 Angular Material 2 中设置底片的宽度
- c# - 类设计、多态加载方法、依赖注入
- oracle - 我们如何在 oracle 数据库表中创建一个数组,我需要在 java 中获取数据
- linux - 与 while .sh 的两个文件比较
- kernel - TensorFlow Lite 架构内核