首页 > 解决方案 > 文本的情感分析

问题描述

我有一个使用朴素贝叶斯执行情感分析的代码。我有三个文件在哪里

  1. classifier.py--> 还有一个朴素贝叶斯分类器的功能。
  2. preprocessor.py--> 有一些函数可以对否定和肯定文本进行标记和分类
  3. main.py--> 创建一个显示输出的界面。

当我尝试执行它们时,有时它会显示一个奇怪的输出,例如用于肯定语句的负 o/p 和用于否定语句的正 o/p。这是我的代码。


分类器.py

import random
import preprocess
import nltk

def get_classifier():

    data = preprocess.get_data()
    random.shuffle(data)

    split = int(0.8 * len(data))

    train_set = data[:split]
    test_set =  data[split:]

    classifier = nltk.NaiveBayesClassifier.train(train_set)

    accuracy = nltk.classify.util.accuracy(classifier, test_set)
    print("Generated Classifier")
    print('-'*70)
    print("Accuracy: ", accuracy)
    return classifier

预处理.py

import nltk.classify
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

stop_words = stopwords.words("english")

def create_word_features_pos(words):
    useful_words = [word for word in words if word not in stop_words]
    my_list = [({word: True}, 'positive') for word in useful_words]
    return my_list


def create_word_features_neg(words):
    useful_words = [word for word in words if word not in stop_words]
    my_list = [({word: True}, 'negative') for word in useful_words]
    return my_list


def create_word_features(words):
    useful_words = [word for word in words if word not in stopwords.words("english")]

    pos_txt = get_tokenized_file(u"positive-words.txt")
    neg_txt = get_tokenized_file(u"negative-words.txt")

    my_dict = dict([(word, True) for word in pos_txt if word in useful_words])
    my_dict1 = dict([(word, False) for word in neg_txt if word in useful_words])

    my_dict.update(my_dict1)


    return my_dict

def get_tokenized_file(file):
    return word_tokenize(open(file, 'r').read())

def get_data():
    print("Collecting Negative Words")
    neg_txt = get_tokenized_file(u"negative-words.txt")
    neg_features = create_word_features_neg(neg_txt)

    print("Collecting Positive Words")
    pos_txt = get_tokenized_file(u"positive-words.txt")
    pos_features = create_word_features_pos(pos_txt)
    return pos_features + neg_features

def process(data):
    return [word.lower() for word in word_tokenize(data)]

主文件

  from preprocess import create_word_features, create_word_features_neg
from preprocess import create_word_features_pos, process
from classifier import get_classifier
import nltk.classify
from tkinter import *


print("Designing UI")
root = Tk()
root.wm_title('Sentiment Analysis Application')

top_frame = Frame(root)
top_frame.pack()

bottom_frame = Frame(root)
bottom_frame.pack(side=BOTTOM)

l1 = Label(top_frame, text='Enter a review:')
l1.pack(side=LEFT)

w = Text(top_frame, height=4 )
w.pack(side=LEFT)

print("UI COMPLETE")
clf = get_classifier()

def main_op():
    review_spirit = w.get('1.0',END)
    demo = process(review_spirit)

    demo1 = create_word_features(demo)
    demo2 = ('review is ' + clf.classify(demo1))
    l2 = Label(bottom_frame, text=demo2)
    l2.pack()

button = Button(bottom_frame, text='Analyse', command=main_op )
button.pack(side=BOTTOM)

root.mainloop()

我将在 Anaconda Spyder 中运行此代码。请帮我整理一下这段代码以获得准确的输出。谢谢你。

标签: pythonsentiment-analysisnaivebayes

解决方案


推荐阅读