python - 文本的情感分析
问题描述
我有一个使用朴素贝叶斯执行情感分析的代码。我有三个文件在哪里
classifier.py
--> 还有一个朴素贝叶斯分类器的功能。preprocessor.py
--> 有一些函数可以对否定和肯定文本进行标记和分类main.py
--> 创建一个显示输出的界面。
当我尝试执行它们时,有时它会显示一个奇怪的输出,例如用于肯定语句的负 o/p 和用于否定语句的正 o/p。这是我的代码。
分类器.py
import random
import preprocess
import nltk
def get_classifier():
data = preprocess.get_data()
random.shuffle(data)
split = int(0.8 * len(data))
train_set = data[:split]
test_set = data[split:]
classifier = nltk.NaiveBayesClassifier.train(train_set)
accuracy = nltk.classify.util.accuracy(classifier, test_set)
print("Generated Classifier")
print('-'*70)
print("Accuracy: ", accuracy)
return classifier
预处理.py
import nltk.classify
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stop_words = stopwords.words("english")
def create_word_features_pos(words):
useful_words = [word for word in words if word not in stop_words]
my_list = [({word: True}, 'positive') for word in useful_words]
return my_list
def create_word_features_neg(words):
useful_words = [word for word in words if word not in stop_words]
my_list = [({word: True}, 'negative') for word in useful_words]
return my_list
def create_word_features(words):
useful_words = [word for word in words if word not in stopwords.words("english")]
pos_txt = get_tokenized_file(u"positive-words.txt")
neg_txt = get_tokenized_file(u"negative-words.txt")
my_dict = dict([(word, True) for word in pos_txt if word in useful_words])
my_dict1 = dict([(word, False) for word in neg_txt if word in useful_words])
my_dict.update(my_dict1)
return my_dict
def get_tokenized_file(file):
return word_tokenize(open(file, 'r').read())
def get_data():
print("Collecting Negative Words")
neg_txt = get_tokenized_file(u"negative-words.txt")
neg_features = create_word_features_neg(neg_txt)
print("Collecting Positive Words")
pos_txt = get_tokenized_file(u"positive-words.txt")
pos_features = create_word_features_pos(pos_txt)
return pos_features + neg_features
def process(data):
return [word.lower() for word in word_tokenize(data)]
主文件
from preprocess import create_word_features, create_word_features_neg
from preprocess import create_word_features_pos, process
from classifier import get_classifier
import nltk.classify
from tkinter import *
print("Designing UI")
root = Tk()
root.wm_title('Sentiment Analysis Application')
top_frame = Frame(root)
top_frame.pack()
bottom_frame = Frame(root)
bottom_frame.pack(side=BOTTOM)
l1 = Label(top_frame, text='Enter a review:')
l1.pack(side=LEFT)
w = Text(top_frame, height=4 )
w.pack(side=LEFT)
print("UI COMPLETE")
clf = get_classifier()
def main_op():
review_spirit = w.get('1.0',END)
demo = process(review_spirit)
demo1 = create_word_features(demo)
demo2 = ('review is ' + clf.classify(demo1))
l2 = Label(bottom_frame, text=demo2)
l2.pack()
button = Button(bottom_frame, text='Analyse', command=main_op )
button.pack(side=BOTTOM)
root.mainloop()
我将在 Anaconda Spyder 中运行此代码。请帮我整理一下这段代码以获得准确的输出。谢谢你。
解决方案
推荐阅读
- qml - QML 绑定循环在未知文件中检测到运行时警告
- c - 为 Postgres 11 创建扩展名导致“无法访问文件 $libdir/...”
- java - 自定义控制台输出 - Surefire 插件
- java - 更新查询spring数据JPA修改所有列
- python - Python function with "Position only parameter"
- python - Python - 保存网络抓取的文件 - 波兰语字符编码错误
- amazon-web-services - AWS CodeDeploy 的 GitHub 环境变量
- android - 自动生成的构造函数 Kotlin
- c# - 添加对 System.IO.Compression、System.IO.Compression.ZipFile 的 NuGet 引用,但编译器使用 Microsoft.NET.Build.Extensions 文件夹?
- npm - 列出所有 npm_package_vars 的命令