Commit 3e2b8bae authored by J. Fernando Sánchez's avatar J. Fernando Sánchez

Last batch of big changes

* Add Box plugin (i.e. black box)
* Add SentimentBox, EmotionBox and MappingMixin
* Refactored CustomDict
parent 21a5a3f2
This is a collection of plugins that exemplify certain aspects of plugin development with senpy. This is a collection of plugins that exemplify certain aspects of plugin development with senpy.
In ascending order of complexity, there are:
* Basic: a very basic analysis that does sentiment analysis based on emojis. The first series of plugins the `basic` ones.
* Configurable: a version of `basic` with a configurable map of emojis for each sentiment. Their starting point is a classification function defined in `basic.py`.
* Parameterized: like `basic_info`, but users set the map in each query (via `extra_parameters`). They all include testing and running them as a script will run all tests.
In ascending order of customization, the plugins are:
* Basic is the simplest plugin of all. It leverages the `SentimentBox` Plugin class to create a plugin out of a classification method, and `MappingMixin` to convert the labels from (`pos`, `neg`) to (`marl:Positive`, `marl:Negative`
* Basic_box is just like the previous one, but replaces the mixin with a custom function.
* Basic_configurable is a version of `basic` with a configurable map of emojis for each sentiment.
* Basic_parameterized like `basic_info`, but users set the map in each query (via `extra_parameters`).
* Basic_analyse\_entry uses the more general `analyse_entry` method and adds the annotations individually.
In rest of the plugins show advanced topics:
* mynoop: shows how to add a definition file with external requirements for a plugin. Doing this with a python-only module would require moving all imports of the requirements to their functions, which is considered bad practice. * mynoop: shows how to add a definition file with external requirements for a plugin. Doing this with a python-only module would require moving all imports of the requirements to their functions, which is considered bad practice.
* Async: a barebones example of training a plugin and analyzing data in parallel. * Async: a barebones example of training a plugin and analyzing data in parallel.
......
...@@ -2,13 +2,13 @@ ...@@ -2,13 +2,13 @@
# coding: utf-8 # coding: utf-8
emoticons = { emoticons = {
'marl:Positive': [':)', ':]', '=)', ':D'], 'pos': [':)', ':]', '=)', ':D'],
'marl:Negative': [':(', ':[', '=('] 'neg': [':(', ':[', '=(']
} }
emojis = { emojis = {
'marl:Positive': ['😁', '😂', '😃', '😄', '😆', '😅', '😄' '😍'], 'pos': ['😁', '😂', '😃', '😄', '😆', '😅', '😄' '😍'],
'marl:Negative': ['😢', '😡', '😠', '😞', '😖', '😔', '😓', '😒'] 'neg': ['😢', '😡', '😠', '😞', '😖', '😔', '😓', '😒']
} }
......
#!/usr/local/bin/python
# coding: utf-8
from senpy import easy_test, models, plugins
import basic
class BasicAnalyseEntry(plugins.SentimentPlugin):
'''Equivalent to Basic, implementing the analyse_entry method'''
author = '@balkian'
version = '0.1'
mappings = {
'pos': 'marl:Positive',
'neg': 'marl:Negative',
'default': 'marl:Neutral'
}
def analyse_entry(self, entry, params):
polarity = basic.get_polarity(entry.text)
polarity = self.mappings.get(polarity, self.mappings['default'])
s = models.Sentiment(marl__hasPolarity=polarity)
s.prov(self)
entry.sentiments.append(s)
yield entry
test_cases = [{
'input': 'Hello :)',
'polarity': 'marl:Positive'
}, {
'input': 'So sad :(',
'polarity': 'marl:Negative'
}, {
'input': 'Yay! Emojis 😁',
'polarity': 'marl:Positive'
}, {
'input': 'But no emoticons 😢',
'polarity': 'marl:Negative'
}]
if __name__ == '__main__':
easy_test()
#!/usr/local/bin/python
# coding: utf-8
from senpy import easy_test, SentimentBox
import basic
class BasicBox(SentimentBox):
''' A modified version of Basic that also does converts annotations manually'''
author = '@balkian'
version = '0.1'
mappings = {
'pos': 'marl:Positive',
'neg': 'marl:Negative',
'default': 'marl:Neutral'
}
def box(self, input, **kwargs):
output = basic.get_polarity(input)
return self.mappings.get(output, self.mappings['default'])
test_cases = [{
'input': 'Hello :)',
'polarity': 'marl:Positive'
}, {
'input': 'So sad :(',
'polarity': 'marl:Negative'
}, {
'input': 'Yay! Emojis 😁',
'polarity': 'marl:Positive'
}, {
'input': 'But no emoticons 😢',
'polarity': 'marl:Negative'
}]
if __name__ == '__main__':
easy_test()
#!/usr/local/bin/python #!/usr/local/bin/python
# coding: utf-8 # coding: utf-8
from senpy import easy_test, models, plugins from senpy import easy_test, SentimentBox, MappingMixin
import basic import basic
class Basic(plugins.SentimentPlugin): class Basic(MappingMixin, SentimentBox):
'''Provides sentiment annotation using a lexicon''' '''Provides sentiment annotation using a lexicon'''
author = '@balkian' author = '@balkian'
version = '0.1' version = '0.1'
def analyse_entry(self, entry, params): mappings = {
'pos': 'marl:Positive',
'neg': 'marl:Negative',
'default': 'marl:Neutral'
}
polarity = basic.get_polarity(entry.text) def box(self, input, **kwargs):
return basic.get_polarity(input)
s = models.Sentiment(marl__hasPolarity=polarity)
s.prov(self)
entry.sentiments.append(s)
yield entry
test_cases = [{ test_cases = [{
'input': 'Hello :)', 'input': 'Hello :)',
......
...@@ -14,8 +14,12 @@ class Dictionary(plugins.SentimentPlugin): ...@@ -14,8 +14,12 @@ class Dictionary(plugins.SentimentPlugin):
dictionaries = [basic.emojis, basic.emoticons] dictionaries = [basic.emojis, basic.emoticons]
mappings = {'pos': 'marl:Positive', 'neg': 'marl:Negative'}
def analyse_entry(self, entry, params): def analyse_entry(self, entry, params):
polarity = basic.get_polarity(entry.text, self.dictionaries) polarity = basic.get_polarity(entry.text, self.dictionaries)
if polarity in self.mappings:
polarity = self.mappings[polarity]
s = models.Sentiment(marl__hasPolarity=polarity) s = models.Sentiment(marl__hasPolarity=polarity)
s.prov(self) s.prov(self)
...@@ -80,14 +84,14 @@ class Salutes(Dictionary): ...@@ -80,14 +84,14 @@ class Salutes(Dictionary):
'''Sentiment annotation with a custom lexicon, for illustration purposes''' '''Sentiment annotation with a custom lexicon, for illustration purposes'''
dictionaries = [{ dictionaries = [{
'marl:Positive': ['Hello', '!'], 'marl:Positive': ['Hello', '!'],
'marl:Negative': ['sad', ] 'marl:Negative': ['Good bye', ]
}] }]
test_cases = [{ test_cases = [{
'input': 'Hello :)', 'input': 'Hello :)',
'polarity': 'marl:Positive' 'polarity': 'marl:Positive'
}, { }, {
'input': 'So sad :(', 'input': 'Good bye :(',
'polarity': 'marl:Negative' 'polarity': 'marl:Negative'
}, { }, {
'input': 'Yay! Emojis 😁', 'input': 'Yay! Emojis 😁',
......
...@@ -7,8 +7,8 @@ import basic ...@@ -7,8 +7,8 @@ import basic
class ParameterizedDictionary(plugins.SentimentPlugin): class ParameterizedDictionary(plugins.SentimentPlugin):
'''This is a basic self-contained plugin'''
description = 'This is a basic self-contained plugin'
author = '@balkian' author = '@balkian'
version = '0.2' version = '0.2'
......
...@@ -46,9 +46,9 @@ def main(): ...@@ -46,9 +46,9 @@ def main():
''' '''
try: try:
res = main_function(sys.argv[1:]) res = main_function(sys.argv[1:])
print(res.to_JSON()) print(res.serialize())
except Error as err: except Error as err:
print(err.to_JSON()) print(err.serialize())
sys.exit(2) sys.exit(2)
......
...@@ -8,6 +8,7 @@ import inspect ...@@ -8,6 +8,7 @@ import inspect
import copy import copy
from abc import ABCMeta from abc import ABCMeta
from collections import MutableMapping, namedtuple
class BaseMeta(ABCMeta): class BaseMeta(ABCMeta):
...@@ -31,24 +32,31 @@ class BaseMeta(ABCMeta): ...@@ -31,24 +32,31 @@ class BaseMeta(ABCMeta):
_subtypes = {} _subtypes = {}
def __new__(mcs, name, bases, attrs, **kwargs): def __new__(mcs, name, bases, attrs, **kwargs):
defaults = {}
register_afterwards = False register_afterwards = False
defaults = {}
attrs = mcs.expand_with_schema(name, attrs) attrs = mcs.expand_with_schema(name, attrs)
if 'schema' in attrs: if 'schema' in attrs:
register_afterwards = True register_afterwards = True
defaults = mcs.get_defaults(attrs['schema']) for base in bases:
for b in bases: if hasattr(base, '_defaults'):
if hasattr(b, '_defaults'): defaults.update(getattr(base, '_defaults'))
defaults.update(b._defaults)
info, attrs = mcs.split_attrs(attrs) info, rest = mcs.split_attrs(attrs)
defaults.update(info)
attrs['_defaults'] = defaults for i in list(info.keys()):
if isinstance(info[i], _Alias):
fget, fset, fdel = make_property(info[i].indict)
rest[i] = property(fget=fget, fset=fset, fdel=fdel)
else:
defaults[i] = info[i]
rest['_defaults'] = defaults
cls = super(BaseMeta, mcs).__new__(mcs, name, tuple(bases), attrs) cls = super(BaseMeta, mcs).__new__(mcs, name, tuple(bases), rest)
if register_afterwards: if register_afterwards:
mcs.register(cls, cls._defaults['@type']) mcs.register(cls, defaults['@type'])
return cls return cls
@classmethod @classmethod
...@@ -81,17 +89,26 @@ class BaseMeta(ABCMeta): ...@@ -81,17 +89,26 @@ class BaseMeta(ABCMeta):
attrs['_schema_file'] = schema_file attrs['_schema_file'] = schema_file
attrs['schema'] = schema attrs['schema'] = schema
attrs['_validator'] = jsonschema.Draft4Validator(schema, resolver=resolver) attrs['_validator'] = jsonschema.Draft4Validator(schema, resolver=resolver)
schema_defaults = BaseMeta.get_defaults(attrs['schema'])
attrs.update(schema_defaults)
return attrs return attrs
@staticmethod @staticmethod
def is_attr(k, v): def is_func(v):
return (not(inspect.isroutine(v) or return inspect.isroutine(v) or inspect.ismethod(v) or \
inspect.ismethod(v) or inspect.ismodule(v) or isinstance(v, property)
inspect.ismodule(v) or
isinstance(v, property)) and @staticmethod
k[0] != '_' and def is_internal(k):
k != 'schema' and return k[0] == '_' or k == 'schema' or k == 'data'
k != 'data')
@staticmethod
def get_key(key):
if key[0] != '_':
key = key.replace("__", ":", 1)
return key
@staticmethod @staticmethod
def split_attrs(attrs): def split_attrs(attrs):
...@@ -102,15 +119,13 @@ class BaseMeta(ABCMeta): ...@@ -102,15 +119,13 @@ class BaseMeta(ABCMeta):
e.g.: e.g.:
''' '''
isattr = {} isattr = {}
notattr = {} rest = {}
for key, value in attrs.items(): for key, value in attrs.items():
if BaseMeta.is_attr(key, value): if not (BaseMeta.is_internal(key)) and (not BaseMeta.is_func(value)):
if key[0] != '_': isattr[key] = value
key = key.replace("__", ":", 1)
isattr[key] = copy.deepcopy(value)
else: else:
notattr[key] = value rest[key] = value
return isattr, notattr return isattr, rest
@staticmethod @staticmethod
def get_defaults(schema): def get_defaults(schema):
...@@ -120,5 +135,123 @@ class BaseMeta(ABCMeta): ...@@ -120,5 +135,123 @@ class BaseMeta(ABCMeta):
] + schema.get('allOf', []): ] + schema.get('allOf', []):
for k, v in obj.get('properties', {}).items(): for k, v in obj.get('properties', {}).items():
if 'default' in v and k not in temp: if 'default' in v and k not in temp:
temp[k] = copy.deepcopy(v['default']) temp[k] = v['default']
return temp return temp
def make_property(key):
def fget(self):
return self[key]
def fdel(self):
del self[key]
def fset(self, value):
self[key] = value
return fget, fset, fdel
class CustomDict(MutableMapping, object):
'''
A dictionary whose elements can also be accessed as attributes. Since some
characters are not valid in the dot-notation, the attribute names also
converted. e.g.:
> d = CustomDict()
> d.key = d['ns:name'] = 1
> d.key == d['key']
True
> d.ns__name == d['ns:name']
'''
_defaults = {}
_map_attr_key = {'id': '@id'}
def __init__(self, *args, **kwargs):
super(CustomDict, self).__init__()
for k, v in self._defaults.items():
self[k] = copy.copy(v)
for arg in args:
self.update(arg)
for k, v in kwargs.items():
self[self._attr_to_key(k)] = v
return self
def serializable(self):
def ser_or_down(item):
if hasattr(item, 'serializable'):
return item.serializable()
elif isinstance(item, dict):
temp = dict()
for kp in item:
vp = item[kp]
temp[kp] = ser_or_down(vp)
return temp
elif isinstance(item, list) or isinstance(item, set):
return list(ser_or_down(i) for i in item)
else:
return item
return ser_or_down(self.as_dict())
def __getitem__(self, key):
key = self._key_to_attr(key)
return self.__dict__[key]
def __setitem__(self, key, value):
'''Do not insert data directly, there might be a property in that key. '''
key = self._key_to_attr(key)
return setattr(self, key, value)
def as_dict(self):
return {self._attr_to_key(k): v for k, v in self.__dict__.items()
if not self._internal_key(k)}
def __iter__(self):
return (k for k in self.__dict__ if not self._internal_key(k))
def __len__(self):
return len(self.__dict__)
def __delitem__(self, key):
del self.__dict__[key]
def update(self, other):
for k, v in other.items():
self[k] = v
def _attr_to_key(self, key):
key = key.replace("__", ":", 1)
key = self._map_attr_key.get(key, key)
return key
def _key_to_attr(self, key):
if self._internal_key(key):
return key
key = key.replace(":", "__", 1)
return key
def __getattr__(self, key):
try:
return self.__dict__[self._attr_to_key(key)]
except KeyError:
raise AttributeError
@staticmethod
def _internal_key(key):
return key[0] == '_'
def __str__(self):
return str(self.serializable())
def __repr__(self):
return str(self.serializable())
_Alias = namedtuple('Alias', 'indict')
def alias(key):
return _Alias(key)
...@@ -17,8 +17,6 @@ import copy ...@@ -17,8 +17,6 @@ import copy
import json import json
import os import os
import jsonref import jsonref
from collections import UserDict
from flask import Response as FlaskResponse from flask import Response as FlaskResponse
from pyld import jsonld from pyld import jsonld
...@@ -30,7 +28,7 @@ logger = logging.getLogger(__name__) ...@@ -30,7 +28,7 @@ logger = logging.getLogger(__name__)
from rdflib import Graph from rdflib import Graph
from .meta import BaseMeta from .meta import BaseMeta, CustomDict, alias
DEFINITIONS_FILE = 'definitions.json' DEFINITIONS_FILE = 'definitions.json'
CONTEXT_PATH = os.path.join( CONTEXT_PATH = os.path.join(
...@@ -81,67 +79,6 @@ def register(rsubclass, rtype=None): ...@@ -81,67 +79,6 @@ def register(rsubclass, rtype=None):
BaseMeta.register(rsubclass, rtype) BaseMeta.register(rsubclass, rtype)
class CustomDict(UserDict, object):
'''
A dictionary whose elements can also be accessed as attributes. Since some
characters are not valid in the dot-notation, the attribute names also
converted. e.g.:
> d = CustomDict()
> d.key = d['ns:name'] = 1
> d.key == d['key']
True
> d.ns__name == d['ns:name']
'''
_defaults = []
def __init__(self, *args, **kwargs):
temp = copy.deepcopy(self._defaults)
for arg in args:
temp.update(copy.deepcopy(arg))
for k, v in kwargs.items():
temp[self._get_key(k)] = v
super(CustomDict, self).__init__(temp)
@staticmethod
def _get_key(key):
if key is 'id':
key = '@id'
key = key.replace("__", ":", 1)
return key
@staticmethod
def _internal_key(key):
return key[0] == '_' or key == 'data'
def __getattr__(self, key):
'''
__getattr__ only gets called when the attribute could not be found
in the __dict__. So we only need to look for the the element in the
dictionary, or raise an Exception.
'''
mkey = self._get_key(key)
if not self._internal_key(key) and mkey in self:
return self[mkey]
raise AttributeError(key)
def __setattr__(self, key, value):
# Work as usual for internal properties or already existing
# properties
if self._internal_key(key) or key in self.__dict__:
return super(CustomDict, self).__setattr__(key, value)
key = self._get_key(key)
return self.__setitem__(self._get_key(key), value)
def __delattr__(self, key):
if self._internal_key(key):
return object.__delattr__(self, key)
key = self._get_key(key)
self.__delitem__(self._get_key(key))
class BaseModel(with_metaclass(BaseMeta, CustomDict)): class BaseModel(with_metaclass(BaseMeta, CustomDict)):
''' '''
Entities of the base model are a special kind of dictionary that emulates Entities of the base model are a special kind of dictionary that emulates
...@@ -185,14 +122,25 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)): ...@@ -185,14 +122,25 @@ class BaseModel(with_metaclass(BaseMeta, CustomDict)):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
auto_id = kwargs.pop('_auto_id', True) auto_id = kwargs.pop('_auto_id', True)
super(BaseModel, self).__init__(*args, **kwargs) super(BaseModel, self).__init__(*args, **kwargs)
if '@id' not in self and auto_id: if auto_id:
self.id = ':{}_{}'.format(type(self).__name__, time.time()) self.id
if '@type' not in self: if '@type' not in self:
logger.warn('Created an instance of an unknown model') logger.warn('Created an instance of an unknown model')