__init__.py 9.23 KB
Newer Older
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
1 2
from future import standard_library
standard_library.install_aliases()
3

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
4
import os.path
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
5
import os
6
import pickle
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
7
import logging
8
import tempfile
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
9
import copy
10 11 12 13 14 15 16 17

import fnmatch
import inspect
import sys
import subprocess
import importlib
import yaml

18
from .. import models
19
from ..api import API_PARAMS
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
20 21 22

logger = logging.getLogger(__name__)

23

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
def check_template(indict, template):
    if isinstance(template, dict) and isinstance(indict, dict):
        for k, v in template.items():
            if k not in indict:
                return '{} not in {}'.format(k, indict)
            check_template(indict[k], v)
    elif isinstance(template, list) and isinstance(indict, list):
        if len(indict) != len(template):
            raise models.Error('Different size for {} and {}'.format(indict, template))
        for e in template:
            found = False
            for i in indict:
                try:
                    check_template(i, e)
                    found = True
                except models.Error as ex:
                    continue
            if not found:
                raise models.Error('{} not found in {}'.format(e, indict))
    else:
        if indict != template:
            raise models.Error('{} and {} are different'.format(indict, template))


J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
48
class Plugin(models.Plugin):
49
    def __init__(self, info=None):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
50 51 52 53
        """
        Provides a canonical name for plugins and serves as base for other
        kinds of plugins.
        """
54
        if not info:
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
55 56
            raise models.Error(message=("You need to provide configuration"
                                        "information for the plugin."))
57
        logger.debug("Initialising {}".format(info))
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
58
        id = 'plugins/{}_{}'.format(info['name'], info['version'])
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
59
        super(Plugin, self).__init__(id=id, **info)
60
        self.is_activated = False
61

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
62 63 64
    def get_folder(self):
        return os.path.dirname(inspect.getfile(self.__class__))

65 66 67 68 69 70
    def activate(self):
        pass

    def deactivate(self):
        pass

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
71
    def test(self):
72 73 74 75
        if not hasattr(self, 'test_cases'):
            import inspect
            raise AttributeError(('Plugin {} [{}] does not have any defined '
                                  'test cases').format(self.id, inspect.getfile(self.__class__)))
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
76 77 78 79 80 81 82
        for case in self.test_cases:
            res = list(self.analyse_entry(models.Entry(case['entry']),
                                          case['params']))
            exp = case['expected']
            if not isinstance(exp, list):
                exp = [exp]
            check_template(res, exp)
83 84
            for r in res:
                r.validate()
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
85

86

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
87 88 89 90
SenpyPlugin = Plugin


class AnalysisPlugin(Plugin):
91

92
    def analyse(self, *args, **kwargs):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
        raise NotImplemented(
            'Your method should implement either analyse or analyse_entry')

    def analyse_entry(self, entry, parameters):
        """ An implemented plugin should override this method.
        This base method is here to adapt old style plugins which only
        implement the *analyse* function.
        Note that this method may yield an annotated entry or a list of
        entries (e.g. in a tokenizer)
        """
        text = entry['text']
        params = copy.copy(parameters)
        params['input'] = text
        results = self.analyse(**params)
        for i in results.entries:
            yield i
109

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
110 111 112 113 114 115
    def analyse_entries(self, entries, parameters):
        for entry in entries:
            logger.debug('Analysing entry with plugin {}: {}'.format(self, entry))
            for result in self.analyse_entry(entry, parameters):
                yield result

116

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
117
class ConversionPlugin(Plugin):
118
    pass
119

120

121
class SentimentPlugin(models.SentimentPlugin, AnalysisPlugin):
122 123 124 125
    def __init__(self, info, *args, **kwargs):
        super(SentimentPlugin, self).__init__(info, *args, **kwargs)
        self.minPolarityValue = float(info.get("minPolarityValue", 0))
        self.maxPolarityValue = float(info.get("maxPolarityValue", 1))
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
126

127

128
class EmotionPlugin(models.EmotionPlugin, AnalysisPlugin):
129
    def __init__(self, info, *args, **kwargs):
130
        super(EmotionPlugin, self).__init__(info, *args, **kwargs)
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
131 132 133 134
        self.minEmotionValue = float(info.get("minEmotionValue", -1))
        self.maxEmotionValue = float(info.get("maxEmotionValue", 1))


135 136
class EmotionConversionPlugin(models.EmotionConversionPlugin, ConversionPlugin):
    pass
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
137 138 139 140 141


class ShelfMixin(object):
    @property
    def sh(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
142
        if not hasattr(self, '_sh') or self._sh is None:
143 144
            self.__dict__['_sh'] = {}
            if os.path.isfile(self.shelf_file):
145 146
                try:
                    self.__dict__['_sh'] = pickle.load(open(self.shelf_file, 'rb'))
147
                except (IndexError, EOFError, pickle.UnpicklingError):
148 149 150
                    logger.warning('{} has a corrupted shelf file!'.format(self.id))
                    if not self.get('force_shelf', False):
                        raise
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
151 152 153 154
        return self._sh

    @sh.deleter
    def sh(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
155 156
        if os.path.isfile(self.shelf_file):
            os.remove(self.shelf_file)
157 158
            del self.__dict__['_sh']
        self.save()
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
159

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
160 161
    @property
    def shelf_file(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
162
        if 'shelf_file' not in self or not self['shelf_file']:
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
163 164
            sd = os.environ.get('SENPY_DATA', tempfile.gettempdir())
            self.shelf_file = os.path.join(sd, self.name + '.p')
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
165
        return self['shelf_file']
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
166

167
    def save(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
168
        logger.debug('saving pickle')
169 170 171
        if hasattr(self, '_sh') and self._sh is not None:
            with open(self.shelf_file, 'wb') as f:
                pickle.dump(self._sh, f)
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208


default_plugin_type = API_PARAMS['plugin_type']['default']


def pfilter(plugins, **kwargs):
    """ Filter plugins by different criteria """
    if isinstance(plugins, models.Plugins):
        plugins = plugins.plugins
    elif isinstance(plugins, dict):
        plugins = plugins.values()
    ptype = kwargs.pop('plugin_type', default_plugin_type)
    logger.debug('#' * 100)
    logger.debug('ptype {}'.format(ptype))
    if ptype:
        try:
            ptype = ptype[0].upper() + ptype[1:]
            pclass = globals()[ptype]
            logger.debug('Class: {}'.format(pclass))
            candidates = filter(lambda x: isinstance(x, pclass),
                                plugins)
        except KeyError:
            raise models.Error('{} is not a valid type'.format(ptype))
    else:
        candidates = plugins

    logger.debug(candidates)

    def matches(plug):
        res = all(getattr(plug, k, None) == v for (k, v) in kwargs.items())
        logger.debug(
            "matching {} with {}: {}".format(plug.name, kwargs, res))
        return res

    if kwargs:
        candidates = filter(matches, candidates)
    return {p.name: p for p in candidates}
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289


def validate_info(info):
    return all(x in info for x in ('name', 'module', 'description', 'version'))


def load_module(name, root):
    sys.path.append(root)
    tmp = importlib.import_module(name)
    sys.path.remove(root)
    return tmp


def log_subprocess_output(process):
    for line in iter(process.stdout.readline, b''):
        logger.info('%r', line)
    for line in iter(process.stderr.readline, b''):
        logger.error('%r', line)


def install_deps(*plugins):
    for info in plugins:
        requirements = info.get('requirements', [])
        if requirements:
            pip_args = ['pip']
            pip_args.append('install')
            pip_args.append('--use-wheel')
            for req in requirements:
                pip_args.append(req)
            logger.info('Installing requirements: ' + str(requirements))
            process = subprocess.Popen(pip_args,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            log_subprocess_output(process)
            exitcode = process.wait()
            if exitcode != 0:
                raise models.Error("Dependencies not properly installed")


def load_plugin_from_info(info, root, validator=validate_info):
    if not validator(info):
        logger.warn('The module info is not valid.\n\t{}'.format(info))
        return None, None
    module = info["module"]
    name = info["name"]

    install_deps(info)
    tmp = load_module(module, root)

    candidate = None
    for _, obj in inspect.getmembers(tmp):
        if inspect.isclass(obj) and inspect.getmodule(obj) == tmp:
            logger.debug(("Found plugin class:"
                          " {}@{}").format(obj, inspect.getmodule(obj)))
            candidate = obj
            break
    if not candidate:
        logger.debug("No valid plugin for: {}".format(module))
        return
    module = candidate(info=info)
    return name, module


def load_plugin(root, filename):
    fpath = os.path.join(root, filename)
    logger.debug("Loading plugin: {}".format(fpath))
    with open(fpath, 'r') as f:
        info = yaml.load(f)
    logger.debug("Info: {}".format(info))
    return load_plugin_from_info(info, root)


def load_plugins(folders, loader=load_plugin):
    plugins = {}
    for search_folder in folders:
        for root, dirnames, filenames in os.walk(search_folder):
            for filename in fnmatch.filter(filenames, '*.senpy'):
                name, plugin = loader(root, filename)
                if plugin and name:
                    plugins[name] = plugin
    return plugins