__init__.py 9.23 KB
Newer Older
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
1
2
from future import standard_library
standard_library.install_aliases()
3

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
4
import os.path
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
5
import os
6
import pickle
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
7
import logging
8
import tempfile
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
9
import copy
10
11
12
13
14
15
16
17

import fnmatch
import inspect
import sys
import subprocess
import importlib
import yaml

18
from .. import models
19
from ..api import API_PARAMS
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
20
21
22

logger = logging.getLogger(__name__)

23

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def check_template(indict, template):
    if isinstance(template, dict) and isinstance(indict, dict):
        for k, v in template.items():
            if k not in indict:
                return '{} not in {}'.format(k, indict)
            check_template(indict[k], v)
    elif isinstance(template, list) and isinstance(indict, list):
        if len(indict) != len(template):
            raise models.Error('Different size for {} and {}'.format(indict, template))
        for e in template:
            found = False
            for i in indict:
                try:
                    check_template(i, e)
                    found = True
                except models.Error as ex:
                    continue
            if not found:
                raise models.Error('{} not found in {}'.format(e, indict))
    else:
        if indict != template:
            raise models.Error('{} and {} are different'.format(indict, template))


J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
48
class Plugin(models.Plugin):
49
    def __init__(self, info=None):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
50
51
52
53
        """
        Provides a canonical name for plugins and serves as base for other
        kinds of plugins.
        """
54
        if not info:
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
55
56
            raise models.Error(message=("You need to provide configuration"
                                        "information for the plugin."))
57
        logger.debug("Initialising {}".format(info))
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
58
        id = 'plugins/{}_{}'.format(info['name'], info['version'])
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
59
        super(Plugin, self).__init__(id=id, **info)
60
        self.is_activated = False
61

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
62
63
64
    def get_folder(self):
        return os.path.dirname(inspect.getfile(self.__class__))

65
66
67
68
69
70
    def activate(self):
        pass

    def deactivate(self):
        pass

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
71
    def test(self):
72
73
74
75
        if not hasattr(self, 'test_cases'):
            import inspect
            raise AttributeError(('Plugin {} [{}] does not have any defined '
                                  'test cases').format(self.id, inspect.getfile(self.__class__)))
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
76
77
78
79
80
81
82
        for case in self.test_cases:
            res = list(self.analyse_entry(models.Entry(case['entry']),
                                          case['params']))
            exp = case['expected']
            if not isinstance(exp, list):
                exp = [exp]
            check_template(res, exp)
83
84
            for r in res:
                r.validate()
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
85

86

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
87
88
89
90
SenpyPlugin = Plugin


class AnalysisPlugin(Plugin):
91

92
    def analyse(self, *args, **kwargs):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        raise NotImplemented(
            'Your method should implement either analyse or analyse_entry')

    def analyse_entry(self, entry, parameters):
        """ An implemented plugin should override this method.
        This base method is here to adapt old style plugins which only
        implement the *analyse* function.
        Note that this method may yield an annotated entry or a list of
        entries (e.g. in a tokenizer)
        """
        text = entry['text']
        params = copy.copy(parameters)
        params['input'] = text
        results = self.analyse(**params)
        for i in results.entries:
            yield i
109

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
110
111
112
113
114
115
    def analyse_entries(self, entries, parameters):
        for entry in entries:
            logger.debug('Analysing entry with plugin {}: {}'.format(self, entry))
            for result in self.analyse_entry(entry, parameters):
                yield result

116

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
117
class ConversionPlugin(Plugin):
118
    pass
119

120

121
class SentimentPlugin(models.SentimentPlugin, AnalysisPlugin):
122
123
124
125
    def __init__(self, info, *args, **kwargs):
        super(SentimentPlugin, self).__init__(info, *args, **kwargs)
        self.minPolarityValue = float(info.get("minPolarityValue", 0))
        self.maxPolarityValue = float(info.get("maxPolarityValue", 1))
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
126

127

128
class EmotionPlugin(models.EmotionPlugin, AnalysisPlugin):
129
    def __init__(self, info, *args, **kwargs):
130
        super(EmotionPlugin, self).__init__(info, *args, **kwargs)
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
131
132
133
134
        self.minEmotionValue = float(info.get("minEmotionValue", -1))
        self.maxEmotionValue = float(info.get("maxEmotionValue", 1))


135
136
class EmotionConversionPlugin(models.EmotionConversionPlugin, ConversionPlugin):
    pass
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
137
138
139
140
141


class ShelfMixin(object):
    @property
    def sh(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
142
        if not hasattr(self, '_sh') or self._sh is None:
143
144
            self.__dict__['_sh'] = {}
            if os.path.isfile(self.shelf_file):
145
146
                try:
                    self.__dict__['_sh'] = pickle.load(open(self.shelf_file, 'rb'))
147
                except (IndexError, EOFError, pickle.UnpicklingError):
148
149
150
                    logger.warning('{} has a corrupted shelf file!'.format(self.id))
                    if not self.get('force_shelf', False):
                        raise
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
151
152
153
154
        return self._sh

    @sh.deleter
    def sh(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
155
156
        if os.path.isfile(self.shelf_file):
            os.remove(self.shelf_file)
157
158
            del self.__dict__['_sh']
        self.save()
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
159

J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
160
161
    @property
    def shelf_file(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
162
        if 'shelf_file' not in self or not self['shelf_file']:
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
163
164
            sd = os.environ.get('SENPY_DATA', tempfile.gettempdir())
            self.shelf_file = os.path.join(sd, self.name + '.p')
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
165
        return self['shelf_file']
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
166

167
    def save(self):
J. Fernando Sánchez's avatar
J. Fernando Sánchez committed
168
        logger.debug('saving pickle')
169
170
171
        if hasattr(self, '_sh') and self._sh is not None:
            with open(self.shelf_file, 'wb') as f:
                pickle.dump(self._sh, f)
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208


default_plugin_type = API_PARAMS['plugin_type']['default']


def pfilter(plugins, **kwargs):
    """ Filter plugins by different criteria """
    if isinstance(plugins, models.Plugins):
        plugins = plugins.plugins
    elif isinstance(plugins, dict):
        plugins = plugins.values()
    ptype = kwargs.pop('plugin_type', default_plugin_type)
    logger.debug('#' * 100)
    logger.debug('ptype {}'.format(ptype))
    if ptype:
        try:
            ptype = ptype[0].upper() + ptype[1:]
            pclass = globals()[ptype]
            logger.debug('Class: {}'.format(pclass))
            candidates = filter(lambda x: isinstance(x, pclass),
                                plugins)
        except KeyError:
            raise models.Error('{} is not a valid type'.format(ptype))
    else:
        candidates = plugins

    logger.debug(candidates)

    def matches(plug):
        res = all(getattr(plug, k, None) == v for (k, v) in kwargs.items())
        logger.debug(
            "matching {} with {}: {}".format(plug.name, kwargs, res))
        return res

    if kwargs:
        candidates = filter(matches, candidates)
    return {p.name: p for p in candidates}
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289


def validate_info(info):
    return all(x in info for x in ('name', 'module', 'description', 'version'))


def load_module(name, root):
    sys.path.append(root)
    tmp = importlib.import_module(name)
    sys.path.remove(root)
    return tmp


def log_subprocess_output(process):
    for line in iter(process.stdout.readline, b''):
        logger.info('%r', line)
    for line in iter(process.stderr.readline, b''):
        logger.error('%r', line)


def install_deps(*plugins):
    for info in plugins:
        requirements = info.get('requirements', [])
        if requirements:
            pip_args = ['pip']
            pip_args.append('install')
            pip_args.append('--use-wheel')
            for req in requirements:
                pip_args.append(req)
            logger.info('Installing requirements: ' + str(requirements))
            process = subprocess.Popen(pip_args,
                                       stdout=subprocess.PIPE,
                                       stderr=subprocess.PIPE)
            log_subprocess_output(process)
            exitcode = process.wait()
            if exitcode != 0:
                raise models.Error("Dependencies not properly installed")


def load_plugin_from_info(info, root, validator=validate_info):
    if not validator(info):
        logger.warn('The module info is not valid.\n\t{}'.format(info))
        return None, None
    module = info["module"]
    name = info["name"]

    install_deps(info)
    tmp = load_module(module, root)

    candidate = None
    for _, obj in inspect.getmembers(tmp):
        if inspect.isclass(obj) and inspect.getmodule(obj) == tmp:
            logger.debug(("Found plugin class:"
                          " {}@{}").format(obj, inspect.getmodule(obj)))
            candidate = obj
            break
    if not candidate:
        logger.debug("No valid plugin for: {}".format(module))
        return
    module = candidate(info=info)
    return name, module


def load_plugin(root, filename):
    fpath = os.path.join(root, filename)
    logger.debug("Loading plugin: {}".format(fpath))
    with open(fpath, 'r') as f:
        info = yaml.load(f)
    logger.debug("Info: {}".format(info))
    return load_plugin_from_info(info, root)


def load_plugins(folders, loader=load_plugin):
    plugins = {}
    for search_folder in folders:
        for root, dirnames, filenames in os.walk(search_folder):
            for filename in fnmatch.filter(filenames, '*.senpy'):
                name, plugin = loader(root, filename)
                if plugin and name:
                    plugins[name] = plugin
    return plugins