Commit 41aa142c authored by J. Fernando Sánchez's avatar J. Fernando Sánchez
Browse files

Refactored conversion and postprocessing

parent b4873013
...@@ -3,10 +3,8 @@ from .models import Error, Results, Entry, from_string ...@@ -3,10 +3,8 @@ from .models import Error, Results, Entry, from_string
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
boolean = [True, False] boolean = [True, False]
API_PARAMS = { API_PARAMS = {
"algorithm": { "algorithm": {
"aliases": ["algorithms", "a", "algo"], "aliases": ["algorithms", "a", "algo"],
...@@ -140,6 +138,15 @@ NIF_PARAMS = { ...@@ -140,6 +138,15 @@ NIF_PARAMS = {
} }
} }
BUILTIN_PARAMS = {}
for d in [
NIF_PARAMS, CLI_PARAMS, WEB_PARAMS, PLUGINS_PARAMS, EVAL_PARAMS,
API_PARAMS
]:
for k, v in d.items():
BUILTIN_PARAMS[k] = v
def parse_params(indict, *specs): def parse_params(indict, *specs):
if not specs: if not specs:
...@@ -164,7 +171,7 @@ def parse_params(indict, *specs): ...@@ -164,7 +171,7 @@ def parse_params(indict, *specs):
continue continue
if "options" in options: if "options" in options:
if options["options"] == boolean: if options["options"] == boolean:
outdict[param] = outdict[param] in [None, True, 'true', '1'] outdict[param] = str(outdict[param]).lower() in ['true', '1']
elif outdict[param] not in options["options"]: elif outdict[param] not in options["options"]:
wrong_params[param] = spec[param] wrong_params[param] = spec[param]
if wrong_params: if wrong_params:
...@@ -180,11 +187,19 @@ def parse_params(indict, *specs): ...@@ -180,11 +187,19 @@ def parse_params(indict, *specs):
return outdict return outdict
def parse_extra_params(request, plugin=None): def parse_extra_params(request, plugins=None):
plugins = plugins or []
params = request.parameters.copy() params = request.parameters.copy()
if plugin: for plugin in plugins:
extra_params = parse_params(params, plugin.get('extra_params', {})) if plugin:
params.update(extra_params) extra_params = parse_params(params, plugin.get('extra_params', {}))
for k, v in extra_params.items():
if k not in BUILTIN_PARAMS:
if k in params: # Set by another plugin
del params[k]
else:
params[k] = v
params['{}.{}'.format(plugin.name, k)] = v
return params return params
...@@ -194,12 +209,12 @@ def parse_call(params): ...@@ -194,12 +209,12 @@ def parse_call(params):
params = parse_params(params, NIF_PARAMS) params = parse_params(params, NIF_PARAMS)
if params['informat'] == 'text': if params['informat'] == 'text':
results = Results() results = Results()
entry = Entry(nif__isString=params['input'], entry = Entry(nif__isString=params['input'], id='#') # Use @base
id='#') # Use @base
results.entries.append(entry) results.entries.append(entry)
elif params['informat'] == 'json-ld': elif params['informat'] == 'json-ld':
results = from_string(params['input'], cls=Results) results = from_string(params['input'], cls=Results)
else: # pragma: no cover else: # pragma: no cover
raise NotImplementedError('Informat {} is not implemented'.format(params['informat'])) raise NotImplementedError('Informat {} is not implemented'.format(
params['informat']))
results.parameters = params results.parameters = params
return results return results
...@@ -197,7 +197,9 @@ def api_root(plugin): ...@@ -197,7 +197,9 @@ def api_root(plugin):
plugin = plugin.replace('+', '/') plugin = plugin.replace('+', '/')
plugin = plugin.split('/') plugin = plugin.split('/')
req.parameters['algorithm'] = tuple(plugin) req.parameters['algorithm'] = tuple(plugin)
return current_app.senpy.analyse(req) results = current_app.senpy.analyse(req)
results.analysis = set(i.id for i in results.analysis)
return results
@api_blueprint.route('/evaluate/', methods=['POST', 'GET']) @api_blueprint.route('/evaluate/', methods=['POST', 'GET'])
......
...@@ -6,7 +6,6 @@ from future import standard_library ...@@ -6,7 +6,6 @@ from future import standard_library
standard_library.install_aliases() standard_library.install_aliases()
from . import plugins, api from . import plugins, api
from .plugins import Plugin, evaluate
from .models import Error, AggregatedEvaluation from .models import Error, AggregatedEvaluation
from .blueprints import api_blueprint, demo_blueprint, ns_blueprint from .blueprints import api_blueprint, demo_blueprint, ns_blueprint
...@@ -17,7 +16,6 @@ import copy ...@@ -17,7 +16,6 @@ import copy
import errno import errno
import logging import logging
from . import gsitk_compat from . import gsitk_compat
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -25,6 +23,7 @@ logger = logging.getLogger(__name__) ...@@ -25,6 +23,7 @@ logger = logging.getLogger(__name__)
class Senpy(object): class Senpy(object):
""" Default Senpy extension for Flask """ """ Default Senpy extension for Flask """
def __init__(self, def __init__(self,
app=None, app=None,
plugin_folder=".", plugin_folder=".",
...@@ -50,7 +49,7 @@ class Senpy(object): ...@@ -50,7 +49,7 @@ class Senpy(object):
self.add_folder('plugins', from_root=True) self.add_folder('plugins', from_root=True)
else: else:
# Add only conversion plugins # Add only conversion plugins
self.add_folder(os.path.join('plugins', 'conversion'), self.add_folder(os.path.join('plugins', 'postprocessing'),
from_root=True) from_root=True)
self.app = app self.app = app
if app is not None: if app is not None:
...@@ -115,6 +114,7 @@ class Senpy(object): ...@@ -115,6 +114,7 @@ class Senpy(object):
raise AttributeError("Not a folder or does not exist: %s", folder) raise AttributeError("Not a folder or does not exist: %s", folder)
def _get_plugins(self, request): def _get_plugins(self, request):
'''Get a list of plugins that should be run for a specific request'''
if not self.analysis_plugins: if not self.analysis_plugins:
raise Error( raise Error(
status=404, status=404,
...@@ -132,33 +132,32 @@ class Senpy(object): ...@@ -132,33 +132,32 @@ class Senpy(object):
plugins = list() plugins = list()
for algo in algos: for algo in algos:
algo = algo.lower() algo = algo.lower()
if algo == 'conversion':
continue # Allow 'conversion' as a virtual plugin, which does nothing
if algo not in self._plugins: if algo not in self._plugins:
msg = ("The algorithm '{}' is not valid\n" msg = ("The algorithm '{}' is not valid\n"
"Valid algorithms: {}").format(algo, "Valid algorithms: {}").format(algo,
self._plugins.keys()) self._plugins.keys())
logger.debug(msg) logger.debug(msg)
raise Error( raise Error(status=404, message=msg)
status=404,
message=msg)
plugins.append(self._plugins[algo]) plugins.append(self._plugins[algo])
return plugins return plugins
def _process_entries(self, entries, req, plugins): def _process(self, req, pending, done=None):
""" """
Recursively process the entries with the first plugin in the list, and pass the results Recursively process the entries with the first plugin in the list, and pass the results
to the rest of the plugins. to the rest of the plugins.
""" """
if not plugins: done = done or []
for i in entries: if not pending:
yield i return req
return
plugin = plugins[0] plugin = pending[0]
specific_params = api.parse_extra_params(req, plugin) results = plugin.process(req, conversions_applied=done)
req.analysis.append({'plugin': plugin, if plugin not in results.analysis:
'parameters': specific_params}) results.analysis.append(plugin)
results = plugin.analyse_entries(entries, specific_params) return self._process(results, pending[1:], done)
for i in self._process_entries(results, req, plugins[1:]):
yield i
def install_deps(self): def install_deps(self):
plugins.install_deps(*self.plugins()) plugins.install_deps(*self.plugins())
...@@ -170,72 +169,14 @@ class Senpy(object): ...@@ -170,72 +169,14 @@ class Senpy(object):
by api.parse_call(). by api.parse_call().
""" """
logger.debug("analysing request: {}".format(request)) logger.debug("analysing request: {}".format(request))
entries = request.entries
request.entries = []
plugins = self._get_plugins(request) plugins = self._get_plugins(request)
results = request request.parameters = api.parse_extra_params(request, plugins)
for i in self._process_entries(entries, results, plugins): results = self._process(request, plugins)
results.entries.append(i) logger.debug("Got analysis result: {}".format(results))
self.convert_emotions(results) results = self.postprocess(results)
logger.debug("Returning analysis result: {}".format(results)) logger.debug("Returning post-processed result: {}".format(results))
results.analysis = [i['plugin'].id for i in results.analysis]
return results return results
def _get_datasets(self, request):
if not self.datasets:
raise Error(
status=404,
message=("No datasets found."
" Please verify DatasetManager"))
datasets_name = request.parameters.get('dataset', None).split(',')
for dataset in datasets_name:
if dataset not in self.datasets:
logger.debug(("The dataset '{}' is not valid\n"
"Valid datasets: {}").format(dataset,
self.datasets.keys()))
raise Error(
status=404,
message="The dataset '{}' is not valid".format(dataset))
dm = gsitk_compat.DatasetManager()
datasets = dm.prepare_datasets(datasets_name)
return datasets
@property
def datasets(self):
self._dataset_list = {}
dm = gsitk_compat.DatasetManager()
for item in dm.get_datasets():
for key in item:
if key in self._dataset_list:
continue
properties = item[key]
properties['@id'] = key
self._dataset_list[key] = properties
return self._dataset_list
def evaluate(self, params):
logger.debug("evaluating request: {}".format(params))
results = AggregatedEvaluation()
results.parameters = params
datasets = self._get_datasets(results)
plugins = self._get_plugins(results)
for eval in evaluate(plugins, datasets):
results.evaluations.append(eval)
if 'with_parameters' not in results.parameters:
del results.parameters
logger.debug("Returning evaluation result: {}".format(results))
return results
def _conversion_candidates(self, fromModel, toModel):
candidates = self.plugins(plugin_type='emotionConversionPlugin')
for candidate in candidates:
for pair in candidate.onyx__doesConversion:
logging.debug(pair)
if pair['onyx:conversionFrom'] == fromModel \
and pair['onyx:conversionTo'] == toModel:
yield candidate
def convert_emotions(self, resp): def convert_emotions(self, resp):
""" """
Conversion of all emotions in a response **in place**. Conversion of all emotions in a response **in place**.
...@@ -244,11 +185,12 @@ class Senpy(object): ...@@ -244,11 +185,12 @@ class Senpy(object):
Needless to say, this is far from an elegant solution, but it works. Needless to say, this is far from an elegant solution, but it works.
@todo refactor and clean up @todo refactor and clean up
""" """
plugins = [i['plugin'] for i in resp.analysis] plugins = resp.analysis
params = resp.parameters params = resp.parameters
toModel = params.get('emotionModel', None) toModel = params.get('emotionModel', None)
if not toModel: if not toModel:
return return resp
logger.debug('Asked for model: {}'.format(toModel)) logger.debug('Asked for model: {}'.format(toModel))
output = params.get('conversion', None) output = params.get('conversion', None)
...@@ -257,7 +199,8 @@ class Senpy(object): ...@@ -257,7 +199,8 @@ class Senpy(object):
try: try:
fromModel = plugin.get('onyx:usesEmotionModel', None) fromModel = plugin.get('onyx:usesEmotionModel', None)
candidates[plugin.id] = next(self._conversion_candidates(fromModel, toModel)) candidates[plugin.id] = next(self._conversion_candidates(fromModel, toModel))
logger.debug('Analysis plugin {} uses model: {}'.format(plugin.id, fromModel)) logger.debug('Analysis plugin {} uses model: {}'.format(
plugin.id, fromModel))
except StopIteration: except StopIteration:
e = Error(('No conversion plugin found for: ' e = Error(('No conversion plugin found for: '
'{} -> {}'.format(fromModel, toModel)), '{} -> {}'.format(fromModel, toModel)),
...@@ -266,6 +209,7 @@ class Senpy(object): ...@@ -266,6 +209,7 @@ class Senpy(object):
e.parameters = params e.parameters = params
raise e raise e
newentries = [] newentries = []
done = []
for i in resp.entries: for i in resp.entries:
if output == "full": if output == "full":
newemotions = copy.deepcopy(i.emotions) newemotions = copy.deepcopy(i.emotions)
...@@ -274,8 +218,7 @@ class Senpy(object): ...@@ -274,8 +218,7 @@ class Senpy(object):
for j in i.emotions: for j in i.emotions:
plugname = j['prov:wasGeneratedBy'] plugname = j['prov:wasGeneratedBy']
candidate = candidates[plugname] candidate = candidates[plugname]
resp.analysis.append({'plugin': candidate, done.append({'plugin': candidate, 'parameters': params})
'parameters': params})
for k in candidate.convert(j, fromModel, toModel, params): for k in candidate.convert(j, fromModel, toModel, params):
k.prov__wasGeneratedBy = candidate.id k.prov__wasGeneratedBy = candidate.id
if output == 'nested': if output == 'nested':
...@@ -284,12 +227,80 @@ class Senpy(object): ...@@ -284,12 +227,80 @@ class Senpy(object):
i.emotions = newemotions i.emotions = newemotions
newentries.append(i) newentries.append(i)
resp.entries = newentries resp.entries = newentries
return resp
def _conversion_candidates(self, fromModel, toModel):
candidates = self.plugins(plugin_type=plugins.EmotionConversion)
for candidate in candidates:
for pair in candidate.onyx__doesConversion:
logging.debug(pair)
if candidate.can_convert(fromModel, toModel):
yield candidate
def postprocess(self, response):
'''
Transform the results from the analysis plugins.
It has some pre-defined post-processing like emotion conversion,
and it also allows plugins to auto-select themselves.
'''
response = self.convert_emotions(response)
for plug in self.plugins(plugin_type=plugins.PostProcessing):
if plug.check(response, response.analysis):
response = plug.process(response)
return response
def _get_datasets(self, request):
if not self.datasets:
raise Error(
status=404,
message=("No datasets found."
" Please verify DatasetManager"))
datasets_name = request.parameters.get('dataset', None).split(',')
for dataset in datasets_name:
if dataset not in self.datasets:
logger.debug(("The dataset '{}' is not valid\n"
"Valid datasets: {}").format(
dataset, self.datasets.keys()))
raise Error(
status=404,
message="The dataset '{}' is not valid".format(dataset))
dm = gsitk_compat.DatasetManager()
datasets = dm.prepare_datasets(datasets_name)
return datasets
@property
def datasets(self):
self._dataset_list = {}
dm = gsitk_compat.DatasetManager()
for item in dm.get_datasets():
for key in item:
if key in self._dataset_list:
continue
properties = item[key]
properties['@id'] = key
self._dataset_list[key] = properties
return self._dataset_list
def evaluate(self, params):
logger.debug("evaluating request: {}".format(params))
results = AggregatedEvaluation()
results.parameters = params
datasets = self._get_datasets(results)
plugins = self._get_plugins(results)
for eval in plugins.evaluate(plugins, datasets):
results.evaluations.append(eval)
if 'with_parameters' not in results.parameters:
del results.parameters
logger.debug("Returning evaluation result: {}".format(results))
return results
@property @property
def default_plugin(self): def default_plugin(self):
if not self._default or not self._default.is_activated: if not self._default or not self._default.is_activated:
candidates = self.plugins(plugin_type='analysisPlugin', candidates = self.plugins(
is_activated=True) plugin_type='analysisPlugin', is_activated=True)
if len(candidates) > 0: if len(candidates) > 0:
self._default = candidates[0] self._default = candidates[0]
else: else:
...@@ -299,7 +310,7 @@ class Senpy(object): ...@@ -299,7 +310,7 @@ class Senpy(object):
@default_plugin.setter @default_plugin.setter
def default_plugin(self, value): def default_plugin(self, value):
if isinstance(value, Plugin): if isinstance(value, plugins.Plugin):
if not value.is_activated: if not value.is_activated:
raise AttributeError('The default plugin has to be activated.') raise AttributeError('The default plugin has to be activated.')
self._default = value self._default = value
...@@ -351,7 +362,8 @@ class Senpy(object): ...@@ -351,7 +362,8 @@ class Senpy(object):
logger.info("Activating plugin: {}".format(plugin.name)) logger.info("Activating plugin: {}".format(plugin.name))
if sync or not getattr(plugin, 'async', True) or getattr(plugin, 'sync', False): if sync or not getattr(plugin, 'async', True) or getattr(
plugin, 'sync', False):
return self._activate(plugin) return self._activate(plugin)
else: else:
th = Thread(target=partial(self._activate, plugin)) th = Thread(target=partial(self._activate, plugin))
...@@ -374,7 +386,8 @@ class Senpy(object): ...@@ -374,7 +386,8 @@ class Senpy(object):
self._set_active(plugin, False) self._set_active(plugin, False)
if sync or not getattr(plugin, 'async', True) or not getattr(plugin, 'sync', False): if sync or not getattr(plugin, 'async', True) or not getattr(
plugin, 'sync', False):
self._deactivate(plugin) self._deactivate(plugin)
else: else:
th = Thread(target=partial(self._deactivate, plugin)) th = Thread(target=partial(self._deactivate, plugin))
......
from future import standard_library from future import standard_library
standard_library.install_aliases() standard_library.install_aliases()
from future.utils import with_metaclass from future.utils import with_metaclass
from functools import partial from functools import partial
...@@ -10,7 +9,6 @@ import os ...@@ -10,7 +9,6 @@ import os
import re import re
import pickle import pickle
import logging import logging
import copy
import pprint import pprint
import inspect import inspect
...@@ -26,7 +24,6 @@ from .. import api ...@@ -26,7 +24,6 @@ from .. import api
from .. import gsitk_compat from .. import gsitk_compat
from .. import testing from .. import testing
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -46,16 +43,19 @@ class PluginMeta(models.BaseMeta): ...@@ -46,16 +43,19 @@ class PluginMeta(models.BaseMeta):
if doc: if doc:
attrs['description'] = doc attrs['description'] = doc
else: else:
logger.warn(('Plugin {} does not have a description. ' logger.warning(
'Please, add a short summary to help other developers').format(name)) ('Plugin {} does not have a description. '
'Please, add a short summary to help other developers'
).format(name))
cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs) cls = super(PluginMeta, mcs).__new__(mcs, name, bases, attrs)
if alias in mcs._classes: if alias in mcs._classes:
if os.environ.get('SENPY_TESTING', ""): if os.environ.get('SENPY_TESTING', ""):
raise Exception(('The type of plugin {} already exists. ' raise Exception(
'Please, choose a different name').format(name)) ('The type of plugin {} already exists. '
'Please, choose a different name').format(name))
else: else:
logger.warn('Overloading plugin class: {}'.format(alias)) logger.warning('Overloading plugin class: {}'.format(alias))
mcs._classes[alias] = cls mcs._classes[alias] = cls
return cls return cls
...@@ -87,10 +87,12 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): ...@@ -87,10 +87,12 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
if info: if info:
self.update(info) self.update(info)
self.validate() self.validate()
self.id = 'endpoint:plugins/{}_{}'.format(self['name'], self['version']) self.id = 'endpoint:plugins/{}_{}'.format(self['name'],
self['version'])
self.is_activated = False self.is_activated = False
self._lock = threading.Lock() self._lock = threading.Lock()
self._directory = os.path.abspath(os.path.dirname(inspect.getfile(self.__class__))) self._directory = os.path.abspath(
os.path.dirname(inspect.getfile(self.__class__)))
data_folder = data_folder or os.getcwd() data_folder = data_folder or os.getcwd()
subdir = os.path.join(data_folder, self.name) subdir = os.path.join(data_folder, self.name)
...@@ -118,7 +120,8 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)): ...@@ -118,7 +120,8 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
if x not in self: if x not in self: