Commit 3e2b8bae authored by J. Fernando Sánchez's avatar J. Fernando Sánchez

Last batch of big changes

* Add Box plugin (i.e. black box)
* Add SentimentBox, EmotionBox and MappingMixin
* Refactored CustomDict
parent 21a5a3f2
This is a collection of plugins that exemplify certain aspects of plugin development with senpy.
In ascending order of complexity, there are:
* Basic: a very basic analysis that does sentiment analysis based on emojis.
* Configurable: a version of `basic` with a configurable map of emojis for each sentiment.
* Parameterized: like `basic_info`, but users set the map in each query (via `extra_parameters`).
The first series of plugins the `basic` ones.
Their starting point is a classification function defined in `basic.py`.
They all include testing and running them as a script will run all tests.
In ascending order of customization, the plugins are:
* Basic is the simplest plugin of all. It leverages the `SentimentBox` Plugin class to create a plugin out of a classification method, and `MappingMixin` to convert the labels from (`pos`, `neg`) to (`marl:Positive`, `marl:Negative`
* Basic_box is just like the previous one, but replaces the mixin with a custom function.
* Basic_configurable is a version of `basic` with a configurable map of emojis for each sentiment.
* Basic_parameterized like `basic_info`, but users set the map in each query (via `extra_parameters`).
* Basic_analyse\_entry uses the more general `analyse_entry` method and adds the annotations individually.
In rest of the plugins show advanced topics:
* mynoop: shows how to add a definition file with external requirements for a plugin. Doing this with a python-only module would require moving all imports of the requirements to their functions, which is considered bad practice.
* Async: a barebones example of training a plugin and analyzing data in parallel.
......
......@@ -2,13 +2,13 @@
# coding: utf-8
emoticons = {
'marl:Positive': [':)', ':]', '=)', ':D'],
'marl:Negative': [':(', ':[', '=(']
'pos': [':)', ':]', '=)', ':D'],
'neg': [':(', ':[', '=(']
}
emojis = {
'marl:Positive': ['😁', '😂', '😃', '😄', '😆', '😅', '😄' '😍'],
'marl:Negative': ['😢', '😡', '😠', '😞', '😖', '😔', '😓', '😒']
'pos': ['😁', '😂', '😃', '😄', '😆', '😅', '😄' '😍'],
'neg': ['😢', '😡', '😠', '😞', '😖', '😔', '😓', '😒']
}
......
#!/usr/local/bin/python
# coding: utf-8
from senpy import easy_test, models, plugins
import basic
class BasicAnalyseEntry(plugins.SentimentPlugin):
'''Equivalent to Basic, implementing the analyse_entry method'''
author = '@balkian'
version = '0.1'
mappings = {
'pos': 'marl:Positive',
'neg': 'marl:Negative',
'default': 'marl:Neutral'
}
def analyse_entry(self, entry, params):
polarity = basic.get_polarity(entry.text)
polarity = self.mappings.get(polarity, self.mappings['default'])
s = models.Sentiment(marl__hasPolarity=polarity)
s.prov(self)
entry.sentiments.append(s)
yield entry
test_cases = [{
'input': 'Hello :)',
'polarity': 'marl:Positive'
}, {
'input': 'So sad :(',
'polarity': 'marl:Negative'
}, {
'input': 'Yay! Emojis 😁',
'polarity': 'marl:Positive'
}, {
'input': 'But no emoticons 😢',
'polarity': 'marl:Negative'
}]
if __name__ == '__main__':
easy_test()
#!/usr/local/bin/python
# coding: utf-8
from senpy import easy_test, SentimentBox
import basic
class BasicBox(SentimentBox):
''' A modified version of Basic that also does converts annotations manually'''
author = '@balkian'
version = '0.1'
mappings = {
'pos': 'marl:Positive',
'neg': 'marl:Negative',
'default': 'marl:Neutral'
}
def box(self, input, **kwargs):
output = basic.get_polarity(input)
return self.mappings.get(output, self.mappings['default'])
test_cases = [{
'input': 'Hello :)',
'polarity': 'marl:Positive'
}, {
'input': 'So sad :(',
'polarity': 'marl:Negative'
}, {
'input': 'Yay! Emojis 😁',
'polarity': 'marl:Positive'
}, {
'input': 'But no emoticons 😢',
'polarity': 'marl:Negative'
}]
if __name__ == '__main__':
easy_test()
#!/usr/local/bin/python
# coding: utf-8
from senpy import easy_test, models, plugins
from senpy import easy_test, SentimentBox, MappingMixin
import basic
class Basic(plugins.SentimentPlugin):
class Basic(MappingMixin, SentimentBox):
'''Provides sentiment annotation using a lexicon'''
author = '@balkian'
version = '0.1'
def analyse_entry(self, entry, params):
mappings = {
'pos': 'marl:Positive',
'neg': 'marl:Negative',
'default': 'marl:Neutral'
}
polarity = basic.get_polarity(entry.text)
s = models.Sentiment(marl__hasPolarity=polarity)
s.prov(self)
entry.sentiments.append(s)
yield entry
def box(self, input, **kwargs):
return basic.get_polarity(input)
test_cases = [{
'input': 'Hello :)',
......
......@@ -14,8 +14,12 @@ class Dictionary(plugins.SentimentPlugin):
dictionaries = [basic.emojis, basic.emoticons]
mappings = {'pos': 'marl:Positive', 'neg': 'marl:Negative'}
def analyse_entry(self, entry, params):
polarity = basic.get_polarity(entry.text, self.dictionaries)
if polarity in self.mappings:
polarity = self.mappings[polarity]
s = models.Sentiment(marl__hasPolarity=polarity)
s.prov(self)
......@@ -80,14 +84,14 @@ class Salutes(Dictionary):
'''Sentiment annotation with a custom lexicon, for illustration purposes'''
dictionaries = [{
'marl:Positive': ['Hello', '!'],
'marl:Negative': ['sad', ]
'marl:Negative': ['Good bye', ]
}]
test_cases = [{
'input': 'Hello :)',
'polarity': 'marl:Positive'
}, {
'input': 'So sad :(',
'input': 'Good bye :(',
'polarity': 'marl:Negative'
}, {
'input': 'Yay! Emojis 😁',
......
......@@ -7,8 +7,8 @@ import basic
class ParameterizedDictionary(plugins.SentimentPlugin):
'''This is a basic self-contained plugin'''
description = 'This is a basic self-contained plugin'
author = '@balkian'
version = '0.2'
......
......@@ -46,9 +46,9 @@ def main():
'''
try:
res = main_function(sys.argv[1:])
print(res.to_JSON())
print(res.serialize())
except Error as err:
print(err.to_JSON())
print(err.serialize())
sys.exit(2)
......
......@@ -8,6 +8,7 @@ import inspect
import copy
from abc import ABCMeta
from collections import MutableMapping, namedtuple
class BaseMeta(ABCMeta):
......@@ -31,24 +32,31 @@ class BaseMeta(ABCMeta):
_subtypes = {}
def __new__(mcs, name, bases, attrs, **kwargs):
defaults = {}
register_afterwards = False
defaults = {}
attrs = mcs.expand_with_schema(name, attrs)
if 'schema' in attrs:
register_afterwards = True
defaults = mcs.get_defaults(attrs['schema'])
for b in bases:
if hasattr(b, '_defaults'):
defaults.update(b._defaults)
info, attrs = mcs.split_attrs(attrs)
defaults.update(info)
attrs['_defaults'] = defaults
for base in bases:
if hasattr(base, '_defaults'):
defaults.update(getattr(base, '_defaults'))
info, rest = mcs.split_attrs(attrs)
for i in list(info.keys()):
if isinstance(info[i], _Alias):
fget, fset, fdel = make_property(info[i].indict)
rest[i] = property(fget=fget, fset=fset, fdel=fdel)
else:
defaults[i] = info[i]
rest['_defaults'] = defaults
cls = super(BaseMeta, mcs).__new__(mcs, name, tuple(bases), attrs)
cls = super(BaseMeta, mcs).__new__(mcs, name, tuple(bases), rest)
if register_afterwards:
mcs.register(cls, cls._defaults['@type'])
mcs.register(cls, defaults['@type'])
return cls
@classmethod
......@@ -81,17 +89,26 @@ class BaseMeta(ABCMeta):
attrs['_schema_file'] = schema_file
attrs['schema'] = schema
attrs['_validator'] = jsonschema.Draft4Validator(schema, resolver=resolver)
schema_defaults = BaseMeta.get_defaults(attrs['schema'])
attrs.update(schema_defaults)
return attrs
@staticmethod
def is_attr(k, v):
return (not(inspect.isroutine(v) or
inspect.ismethod(v) or
inspect.ismodule(v) or
isinstance(v, property)) and
k[0] != '_' and
k != 'schema' and
k != 'data')
def is_func(v):
return inspect.isroutine(v) or inspect.ismethod(v) or \
inspect.ismodule(v) or isinstance(v, property)
@staticmethod
def is_internal(k):
return k[0] == '_' or k == 'schema' or k == 'data'
@staticmethod
def get_key(key):
if key[0] != '_':
key = key.replace("__", ":", 1)
return key
@staticmethod
def split_attrs(attrs):
......@@ -102,15 +119,13 @@ class BaseMeta(ABCMeta):
e.g.:
'''
isattr = {}
notattr = {}
rest = {}
for key, value in attrs.items():
if BaseMeta.is_attr(key, value):
if key[0] != '_':
key = key.replace("__", ":", 1)
isattr[key] = copy.deepcopy(value)
if not (BaseMeta.is_internal(key)) and (not BaseMeta.is_func(value)):
isattr[key] = value
else:
notattr[key] = value
return isattr, notattr
rest[key] = value
return isattr, rest
@staticmethod
def get_defaults(schema):
......@@ -120,5 +135,123 @@ class BaseMeta(ABCMeta):
] + schema.get('allOf', []):
for k, v in obj.get('properties', {}).items():
if 'default' in v and k not in temp:
temp[k] = copy.deepcopy(v['default'])
temp[k] = v['default']
return temp
def make_property(key):
def fget(self):
return self[key]
def fdel(self):
del self[key]
def fset(self, value):
self[key] = value
return fget, fset, fdel
class CustomDict(MutableMapping, object):
'''
A dictionary whose elements can also be accessed as attributes. Since some
characters are not valid in the dot-notation, the attribute names also
converted. e.g.:
> d = CustomDict()
> d.key = d['ns:name'] = 1
> d.key == d['key']
True
> d.ns__name == d['ns:name']
'''
_defaults = {}
_map_attr_key = {'id': '@id'}
def __init__(self, *args, **kwargs):
super(CustomDict, self).__init__()
for k, v in self._defaults.items():
self[k] = copy.copy(v)
for arg in args:
self.update(arg)
for k, v in kwargs.items():
self[self._attr_to_key(k)] = v
return self
def serializable(self):
def ser_or_down(item):
if hasattr(item, 'serializable'):
return item.serializable()
elif isinstance(item, dict):
temp = dict()
for kp in item:
vp = item[kp]
temp[kp] = ser_or_down(vp)
return temp
elif isinstance(item, list) or isinstance(item, set):
return list(ser_or_down(i) for i in item)
else:
return item
return ser_or_down(self.as_dict())
def __getitem__(self, key):
key = self._key_to_attr(key)
return self.__dict__[key]
def __setitem__(self, key, value):
'''Do not insert data directly, there might be a property in that key. '''
key = self._key_to_attr(key)
return setattr(self, key, value)
def as_dict(self):
return {self._attr_to_key(k): v for k, v in self.__dict__.items()
if not self._internal_key(k)}
def __iter__(self):
return (k for k in self.__dict__ if not self._internal_key(k))
def __len__(self):
return len(self.__dict__)
def __delitem__(self, key):
del self.__dict__[key]
def update(self, other):
for k, v in other.items():
self[k] = v
def _attr_to_key(self, key):
key = key.replace("__", ":", 1)
key = self._map_attr_key.get(key, key)
return key
def _key_to_attr(self, key):
if self._internal_key(key):
return key
key = key.replace(":", "__", 1)
return key
def __getattr__(self, key):
try:
return self.__dict__[self._attr_to_key(key)]
except KeyError:
raise AttributeError
@staticmethod
def _internal_key(key):
return key[0] == '_'
def __str__(self):
return str(self.serializable())
def __repr__(self):
return str(self.serializable())
_Alias = namedtuple('Alias', 'indict')
def alias(key):
return _Alias(key)
......@@ -17,8 +17,6 @@ import copy
import json
import os
import jsonref
from collections import UserDict
from flask import Response as FlaskResponse
from pyld import jsonld
......@@ -30,7 +28,7 @@ logger = logging.getLogger(__name__)
from rdflib import Graph
from .meta import BaseMeta
from .meta import BaseMeta, CustomDict, alias
DEFINITIONS_FILE = 'definitions.json'
CONTEXT_PATH = os.path.join(
......@@ -81,67 +79,6 @@ def register(rsubclass, rtype=None):
BaseMeta.register(rsubclass, rtype)
class CustomDict(UserDict, object):
'''
A dictionary whose elements can also be accessed as attributes. Since some
characters are not valid in the dot-notation, the attribute names also
converted. e.g.:
> d = CustomDict()
> d.key = d['ns:name'] = 1
> d.key == d['key']
True
> d.ns__name == d['ns:name']
'''
_defaults = []
def __init__(self, *args, **kwargs):
temp = copy.deepcopy(self._defaults)
for arg in args:
temp.update(copy.deepcopy(arg))
for k, v in kwargs.items():
temp[self._get_key(k)] = v
super(CustomDict, self).__init__(temp)
@staticmethod
def _get_key(key):
if key is 'id':
key = '@id'
key = key.replace("__", ":", 1)
return key
@staticmethod
def _internal_key(key):
return key[0] == '_' or key == 'data'
def __getattr__(self, key):
'''
__getattr__ only gets called when the attribute could not be found
in the __dict__. So we only need to look for the the element in the
dictionary, or raise an Exception.
'''
mkey = self._get_key(key)
if not self._internal_key(key) and mkey in self:
return self[mkey]
raise AttributeError(key)
def __setattr__(self, key, value):
# Work as usual for internal properties or already existing
# properties
if self._internal_key(key) or key in self.__dict__:
return super(CustomDict, self).__setattr__(key, value)
key = self._get_key(key)
return self.__setitem__(self._get_key(key), value)
def __delattr__(self, key):
if self._internal_key(key):
return object.__delattr__(self, key)
key = self._get_key(key)
self.__delitem__(self._get_key(key))
class BaseModel(with_metaclass(BaseMeta, CustomDict)):
'''
Entities of the base model are a special kind of dictionary that emulates
......@@ -185,14 +122,25 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
def __init__(self, *args, **kwargs):
auto_id = kwargs.pop('_auto_id', True)
super(BaseModel, self).__init__(*args, **kwargs)
if '@id' not in self and auto_id:
self.id = ':{}_{}'.format(type(self).__name__, time.time())
if auto_id:
self.id
if '@type' not in self:
logger.warn('Created an instance of an unknown model')
@property
def id(self):
if '@id' not in self:
self['@id'] = ':{}_{}'.format(type(self).__name__, time.time())
return self['@id']
@id.setter
def id(self, value):
self['@id'] = value
def flask(self,
in_headers=True,
headers=None,
......@@ -246,23 +194,6 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
else:
return content
def serializable(self):
def ser_or_down(item):
if hasattr(item, 'serializable'):
return item.serializable()
elif isinstance(item, dict):
temp = dict()
for kp in item:
vp = item[kp]
temp[kp] = ser_or_down(vp)
return temp
elif isinstance(item, list) or isinstance(item, set):
return list(ser_or_down(i) for i in item)
else:
return item
return ser_or_down(self.data)
def jsonld(self,
with_context=False,
context_uri=None,
......@@ -288,10 +219,6 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
del result['@context']
return result
def to_JSON(self, *args, **kwargs):
js = json.dumps(self.jsonld(*args, **kwargs), indent=4, sort_keys=True)
return js
def validate(self, obj=None):
if not obj:
obj = self
......@@ -299,9 +226,6 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
obj = obj.jsonld()
self._validator.validate(obj)
def __str__(self):
return str(self.serialize())
def prov(self, another):
self['prov:wasGeneratedBy'] = another.id
......@@ -329,7 +253,7 @@ def from_dict(indict, cls=None):
for ix, v2 in enumerate(v):
if isinstance(v2, dict):
v[ix] = from_dict(v2)
outdict[k] = copy.deepcopy(v)
outdict[k] = copy.copy(v)
return cls(**outdict)
......@@ -342,22 +266,23 @@ def from_json(injson):
return from_dict(indict)
class Entry(BaseModel, Exception):
class Entry(BaseModel):
schema = 'entry'
@property
def text(self):
return self['nif:isString']
text = alias('nif:isString')
class Sentiment(BaseModel):
schema = 'sentiment'
@text.setter
def text(self, value):
self['nif:isString'] = value
polarity = alias('marl:hasPolarity')
polarityValue = alias('marl:hasPolarityValue')
class Error(BaseModel, Exception):
schema = 'error'
def __init__(self, message, *args, **kwargs):
def __init__(self, message='Generic senpy exception', *args, **kwargs):
Exception.__init__(self, message)
super(Error, self).__init__(*args, **kwargs)
self.message = message
......@@ -407,7 +332,6 @@ for i in [
'plugins',
'response',
'results',
'sentiment',
'sentimentPlugin',
'suggestion',
]:
......
from future import standard_library
standard_library.install_aliases()
from future.utils import with_metaclass
import os.path
......@@ -120,21 +122,20 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
entry = models.Entry(case['entry'])
given_parameters = case.get('params', case.get('parameters', {}))
expected = case['expected']
should_fail = case.get('should_fail', False)
try:
params = api.parse_params(given_parameters, self.extra_params)
res = list(self.analyse_entry(entry, params))
res = list(self.analyse_entries([entry, ], params))
if not isinstance(expected, list):
expected = [expected]
utils.check_template(res, expected)
for r in res:
r.validate()
except models.Error:
if not expected:
if should_fail:
return
raise
if not expected:
raise Exception('This test should have raised an exception.')
if not isinstance(expected, list):
expected = [expected]
utils.check_template(res, expected)
for r in res:
r.validate()
assert not should_fail
def open(self, fpath, *args, **kwargs):
if not os.path.isabs(fpath):
......@@ -241,6 +242,92 @@ class EmotionConversion(Conversion):
EmotionConversionPlugin = EmotionConversion
class Box(AnalysisPlugin):
'''
Black box plugins delegate analysis to a function.
The flow is like so:
.. code-block::
entry --> input() --> box() --> output() --> entry'
In other words: their ``input`` method convers a query (entry and a set of parameters) into
the input to the box method. The ``output`` method convers the results given by the box into
an entry that senpy can handle.
'''
def input(self, entry, params=None):
'''Transforms a query (entry+param) into an input for the black box'''
return entry
def output(self, output, entry=None, params=None):
'''Transforms the results of the black box into an entry'''
return output
def box(self):
raise NotImplementedError('You should define the behavior of this plugin')
def analyse_entries(self, entries, params):
for entry in entries:
input = self.input(entry=entry, params=params)
results = self.box(input=input, params=params)
yield self.output(output=results, entry=entry, params=params)
class TextBox(Box):
'''A black box plugin that takes only text as input'''
def input(self, entry, params):
entry = s