__init__.py 8.99 KB
Newer Older
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
1
2
from future import standard_library
standard_library.install_aliases()
3

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
4
import os.path
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
5
import os
6
import pickle
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
7
import logging
8
import tempfile
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
9
import copy
10
11
12
13
14
15
16
17

import fnmatch
import inspect
import sys
import subprocess
import importlib
import yaml

18
from .. import models
19
from ..api import API_PARAMS
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
20
21
22

logger = logging.getLogger(__name__)

23

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def check_template(indict, template):
    if isinstance(template, dict) and isinstance(indict, dict):
        for k, v in template.items():
            if k not in indict:
                return '{} not in {}'.format(k, indict)
            check_template(indict[k], v)
    elif isinstance(template, list) and isinstance(indict, list):
        if len(indict) != len(template):
            raise models.Error('Different size for {} and {}'.format(indict, template))
        for e in template:
            found = False
            for i in indict:
                try:
                    check_template(i, e)
                    found = True
                except models.Error as ex:
                    continue
            if not found:
                raise models.Error('{} not found in {}'.format(e, indict))
    else:
        if indict != template:
            raise models.Error('{} and {} are different'.format(indict, template))


J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
48
class Plugin(models.Plugin):
49
    def __init__(self, info=None):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
50
51
52
53
        """
        Provides a canonical name for plugins and serves as base for other
        kinds of plugins.
        """
54
        if not info:
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
55
56
            raise models.Error(message=("You need to provide configuration"
                                        "information for the plugin."))
57
        logger.debug("Initialising {}".format(info))
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
58
        id = 'plugins/{}_{}'.format(info['name'], info['version'])
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
59
        super(Plugin, self).__init__(id=id, **info)
60
        self.is_activated = False
61

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
62
63
64
    def get_folder(self):
        return os.path.dirname(inspect.getfile(self.__class__))

65
66
67
68
69
70
    def activate(self):
        pass

    def deactivate(self):
        pass

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
71
72
73
74
75
76
77
78
    def test(self):
        for case in self.test_cases:
            res = list(self.analyse_entry(models.Entry(case['entry']),
                                          case['params']))
            exp = case['expected']
            if not isinstance(exp, list):
                exp = [exp]
            check_template(res, exp)
79
80
            for r in res:
                r.validate()
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
81

82

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
83
84
85
86
SenpyPlugin = Plugin


class AnalysisPlugin(Plugin):
87

88
    def analyse(self, *args, **kwargs):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
        raise NotImplemented(
            'Your method should implement either analyse or analyse_entry')

    def analyse_entry(self, entry, parameters):
        """ An implemented plugin should override this method.
        This base method is here to adapt old style plugins which only
        implement the *analyse* function.
        Note that this method may yield an annotated entry or a list of
        entries (e.g. in a tokenizer)
        """
        text = entry['text']
        params = copy.copy(parameters)
        params['input'] = text
        results = self.analyse(**params)
        for i in results.entries:
            yield i
105

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
106
107
108
109
110
111
    def analyse_entries(self, entries, parameters):
        for entry in entries:
            logger.debug('Analysing entry with plugin {}: {}'.format(self, entry))
            for result in self.analyse_entry(entry, parameters):
                yield result

112

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
113
class ConversionPlugin(Plugin):
114
    pass
115

116

117
class SentimentPlugin(models.SentimentPlugin, AnalysisPlugin):
118
119
120
121
    def __init__(self, info, *args, **kwargs):
        super(SentimentPlugin, self).__init__(info, *args, **kwargs)
        self.minPolarityValue = float(info.get("minPolarityValue", 0))
        self.maxPolarityValue = float(info.get("maxPolarityValue", 1))
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
122

123

124
class EmotionPlugin(models.EmotionPlugin, AnalysisPlugin):
125
    def __init__(self, info, *args, **kwargs):
126
        super(EmotionPlugin, self).__init__(info, *args, **kwargs)
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
127
128
129
130
        self.minEmotionValue = float(info.get("minEmotionValue", -1))
        self.maxEmotionValue = float(info.get("maxEmotionValue", 1))


131
132
class EmotionConversionPlugin(models.EmotionConversionPlugin, ConversionPlugin):
    pass
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
133
134
135
136
137


class ShelfMixin(object):
    @property
    def sh(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
138
        if not hasattr(self, '_sh') or self._sh is None:
139
140
            self.__dict__['_sh'] = {}
            if os.path.isfile(self.shelf_file):
141
142
                try:
                    self.__dict__['_sh'] = pickle.load(open(self.shelf_file, 'rb'))
143
                except (IndexError, EOFError, pickle.UnpicklingError):
144
145
146
                    logger.warning('{} has a corrupted shelf file!'.format(self.id))
                    if not self.get('force_shelf', False):
                        raise
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
147
148
149
150
        return self._sh

    @sh.deleter
    def sh(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
151
152
        if os.path.isfile(self.shelf_file):
            os.remove(self.shelf_file)
153
154
            del self.__dict__['_sh']
        self.save()
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
155

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
156
157
    @property
    def shelf_file(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
158
        if 'shelf_file' not in self or not self['shelf_file']:
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
159
160
            sd = os.environ.get('SENPY_DATA', tempfile.gettempdir())
            self.shelf_file = os.path.join(sd, self.name + '.p')
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
161
        return self['shelf_file']
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
162

163
    def save(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
164
        logger.debug('saving pickle')
165
166
167
        if hasattr(self, '_sh') and self._sh is not None:
            with open(self.shelf_file, 'wb') as f:
                pickle.dump(self._sh, f)
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204


default_plugin_type = API_PARAMS['plugin_type']['default']


def pfilter(plugins, **kwargs):
    """ Filter plugins by different criteria """
    if isinstance(plugins, models.Plugins):
        plugins = plugins.plugins
    elif isinstance(plugins, dict):
        plugins = plugins.values()
    ptype = kwargs.pop('plugin_type', default_plugin_type)
    logger.debug('#' * 100)
    logger.debug('ptype {}'.format(ptype))
    if ptype:
        try:
            ptype = ptype[0].upper() + ptype[1:]
            pclass = globals()[ptype]
            logger.debug('Class: {}'.format(pclass))
            candidates = filter(lambda x: isinstance(x, pclass),
                                plugins)
        except KeyError:
            raise models.Error('{} is not a valid type'.format(ptype))
    else:
        candidates = plugins

    logger.debug(candidates)

    def matches(plug):
        res = all(getattr(plug, k, None) == v for (k, v) in kwargs.items())
        logger.debug(
            "matching {} with {}: {}".format(plug.name, kwargs, res))
        return res

    if kwargs:
        candidates = filter(matches, candidates)
    return {p.name: p for p in candidates}
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285


def validate_info(info):
    return all(x in info for x in ('name', 'module', 'description', 'version'))


def load_module(name, root):
    sys.path.append(root)
    tmp = importlib.import_module(name)
    sys.path.remove(root)
    return tmp


def log_subprocess_output(process):
    for line in iter(process.stdout.readline, b''):
        logger.info('%r', line)
    for line in iter(process.stderr.readline, b''):
        logger.error('%r', line)


def install_deps(*plugins):
    for info in plugins:
        requirements = info.get('requirements', [])
        if requirements:
            pip_args = ['pip']
            pip_args.append('install')
            pip_args.append('--use-wheel')
            for req in requirements:
                pip_args.append(req)
            logger.info('Installing requirements: ' + str(requirements))
            process = subprocess.Popen(pip_args,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            log_subprocess_output(process)
            exitcode = process.wait()
            if exitcode != 0:
                raise models.Error("Dependencies not properly installed")


def load_plugin_from_info(info, root, validator=validate_info):
    if not validator(info):
        logger.warn('The module info is not valid.\n\t{}'.format(info))
        return None, None
    module = info["module"]
    name = info["name"]

    install_deps(info)
    tmp = load_module(module, root)

    candidate = None
    for _, obj in inspect.getmembers(tmp):
        if inspect.isclass(obj) and inspect.getmodule(obj) == tmp:
            logger.debug(("Found plugin class:"
                          " {}@{}").format(obj, inspect.getmodule(obj)))
            candidate = obj
            break
    if not candidate:
        logger.debug("No valid plugin for: {}".format(module))
        return
    module = candidate(info=info)
    return name, module


def load_plugin(root, filename):
    fpath = os.path.join(root, filename)
    logger.debug("Loading plugin: {}".format(fpath))
    with open(fpath, 'r') as f:
        info = yaml.load(f)
    logger.debug("Info: {}".format(info))
    return load_plugin_from_info(info, root)


def load_plugins(folders, loader=load_plugin):
    plugins = {}
    for search_folder in folders:
        for root, dirnames, filenames in os.walk(search_folder):
            for filename in fnmatch.filter(filenames, '*.senpy'):
                name, plugin = loader(root, filename)
                if plugin and name:
                    plugins[name] = plugin
    return plugins