Source code for textblob.en.sentiments

"""Sentiment analysis implementations.

.. versionadded:: 0.5.0
"""
from collections import namedtuple

import nltk

from textblob.base import CONTINUOUS, DISCRETE, BaseSentimentAnalyzer
from textblob.decorators import requires_nltk_corpus
from textblob.en import sentiment as pattern_sentiment
from textblob.tokenizers import word_tokenize



[docs]
class PatternAnalyzer(BaseSentimentAnalyzer):
    """Sentiment analyzer that uses the same implementation as the
    pattern library. Returns results as a named tuple of the form:

    ``Sentiment(polarity, subjectivity, [assessments])``

    where [assessments] is a list of the assessed tokens and their
    polarity and subjectivity scores
    """

    kind = CONTINUOUS
    # This is only here for backwards-compatibility.
    # The return type is actually determined upon calling analyze()
    RETURN_TYPE = namedtuple("Sentiment", ["polarity", "subjectivity"])


[docs]
    def analyze(self, text, keep_assessments=False):
        """Return the sentiment as a named tuple of the form:
        ``Sentiment(polarity, subjectivity, [assessments])``.
        """
        #: Return type declaration
        if keep_assessments:
            Sentiment = namedtuple(
                "Sentiment", ["polarity", "subjectivity", "assessments"]
            )
            assessments = pattern_sentiment(text).assessments
            polarity, subjectivity = pattern_sentiment(text)
            return Sentiment(polarity, subjectivity, assessments)

        else:
            Sentiment = namedtuple("Sentiment", ["polarity", "subjectivity"])
            return Sentiment(*pattern_sentiment(text))




def _default_feature_extractor(words):
    """Default feature extractor for the NaiveBayesAnalyzer."""
    return dict((word, True) for word in words)



[docs]
class NaiveBayesAnalyzer(BaseSentimentAnalyzer):
    """Naive Bayes analyzer that is trained on a dataset of movie reviews.
    Returns results as a named tuple of the form:
    ``Sentiment(classification, p_pos, p_neg)``

    :param callable feature_extractor: Function that returns a dictionary of
        features, given a list of words.
    """

    kind = DISCRETE
    #: Return type declaration
    RETURN_TYPE = namedtuple("Sentiment", ["classification", "p_pos", "p_neg"])

    def __init__(self, feature_extractor=_default_feature_extractor):
        super().__init__()
        self._classifier = None
        self.feature_extractor = feature_extractor


[docs]
    @requires_nltk_corpus
    def train(self):
        """Train the Naive Bayes classifier on the movie review corpus."""
        super().train()
        neg_ids = nltk.corpus.movie_reviews.fileids("neg")
        pos_ids = nltk.corpus.movie_reviews.fileids("pos")
        neg_feats = [
            (
                self.feature_extractor(nltk.corpus.movie_reviews.words(fileids=[f])),
                "neg",
            )
            for f in neg_ids
        ]
        pos_feats = [
            (
                self.feature_extractor(nltk.corpus.movie_reviews.words(fileids=[f])),
                "pos",
            )
            for f in pos_ids
        ]
        train_data = neg_feats + pos_feats
        self._classifier = nltk.classify.NaiveBayesClassifier.train(train_data)



[docs]
    def analyze(self, text):
        """Return the sentiment as a named tuple of the form:
        ``Sentiment(classification, p_pos, p_neg)``
        """
        # Lazily train the classifier
        super().analyze(text)
        tokens = word_tokenize(text, include_punc=False)
        filtered = (t.lower() for t in tokens if len(t) >= 3)
        feats = self.feature_extractor(filtered)
        prob_dist = self._classifier.prob_classify(feats)
        return self.RETURN_TYPE(
            classification=prob_dist.max(),
            p_pos=prob_dist.prob("pos"),
            p_neg=prob_dist.prob("neg"),
        )
Source code for textblob.en.sentiments

Useful Links

Related Topics