Commit 8e7da569 authored by militarpancho's avatar militarpancho
Browse files

Version 4.1: Added NaiveBayes Classifier

parent 227cc07d
import nltk
import csv
from itertools import islice
from nltk.tokenize import word_tokenize
from senpy.plugins import SentimentPlugin, ShelfMixin
from senpy.models import Sentiment
class NaiveBayesPlugin(SentimentPlugin, ShelfMixin):
def _labelize(self, polarity):
polarity = float(polarity)
if polarity >= 0:
return 'pos'
return 'neg'
def activate(self):'punkt')
if 'NaiveBayesClassifier' not in
train = []
with open(self.corpora_path) as f:
vader = csv.reader(f, delimiter='\t')
for row in vader:
self._all_words = set(word.lower()
for passage in train for word in passage[0])
data_Bayes = [({word: (word in x[0])
for word in self._all_words}, x[1]) for x in train]
classifier = nltk.NaiveBayesClassifier.train(data_Bayes)['NaiveBayesClassifier'] = classifier
self._NaiveBayesClassifier =['NaiveBayesClassifier']
def analyse_entry(self, entry, params):
text = entry.get("text", None)
features = {
word.lower(): (word in word_tokenize(text.lower()))
for word in self._all_words
result = self._NaiveBayesClassifier.classify(features)
polarity = "marl:Neutral"
polarity_value = 0
if result == 'pos':
polarity = "marl:Positive"
polarity_value = self.maxPolarityValue
elif result == 'neg':
polarity = "marl:Negative"
polarity_value = self.minPolarityValue
sentiment = Sentiment({
"marl:hasPolarity": polarity,
"marl:polarityValue": polarity_value
yield entry
name: sentiment-naivebayes
module: sentiment-naivebayes
description: Sentiment classifier based on NLTK's NaiveBayes classifier
author: "@militarpancho"
version: '1.1'
url: ""
requirements: {nltk}
maxPolarityValue: "1"
minPolarityValue: "-1"
- language
- l
required: false
- en
default: en
corpora_path: "/data/vader/tweets_GroundTruth.txt"
download_path: "/data/nltk_data"
\ No newline at end of file
