Commit 435d1076 authored by J. Fernando Sánchez's avatar J. Fernando Sánchez
Browse files

Add headers and minor fixes

parent c4321dc5
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright 2014 J. Fernando Sánchez Rada - Grupo de Sistemas Inteligentes
# DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# http://www.apache.org/licenses/LICENSE-2.0
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# Unless required by applicable law or agreed to in writing, software
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Blueprints for Senpy
"""
......@@ -24,7 +25,7 @@ from . import api
from .version import __version__
from functools import wraps
from .gsitk_compat import GSITK_AVAILABLE
from .gsitk_compat import GSITK_AVAILABLE, datasets
import logging
import json
......@@ -272,8 +273,6 @@ def plugin(plugin):
@api_blueprint.route('/datasets/', methods=['POST', 'GET'])
@basic_api
def datasets():
sp = current_app.senpy
datasets = sp.datasets
def get_datasets():
dic = Datasets(datasets=list(datasets.values()))
return dic
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import sys
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import requests
import logging
from . import models
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
Main class for Senpy.
It orchestrates plugin (de)activation and analysis.
......@@ -274,36 +289,16 @@ class Senpy(object):
return response
def _get_datasets(self, request):
if not self.datasets:
raise Error(
status=404,
message=("No datasets found."
" Please verify DatasetManager"))
datasets_name = request.parameters.get('dataset', None).split(',')
for dataset in datasets_name:
if dataset not in self.datasets:
if dataset not in gsitk_compat.datasets:
logger.debug(("The dataset '{}' is not valid\n"
"Valid datasets: {}").format(
dataset, self.datasets.keys()))
dataset, gsitk_compat.datasets.keys()))
raise Error(
status=404,
message="The dataset '{}' is not valid".format(dataset))
dm = gsitk_compat.DatasetManager()
datasets = dm.prepare_datasets(datasets_name)
return datasets
@property
def datasets(self):
self._dataset_list = {}
dm = gsitk_compat.DatasetManager()
for item in dm.get_datasets():
for key in item:
if key in self._dataset_list:
continue
properties = item[key]
properties['@id'] = key
self._dataset_list[key] = properties
return self._dataset_list
return datasets_name
def evaluate(self, params):
logger.debug("evaluating request: {}".format(params))
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import logging
import os
from pkg_resources import parse_version, get_distribution, DistributionNotFound
......@@ -17,15 +34,34 @@ try:
gsitk_distro = get_distribution("gsitk")
GSITK_VERSION = parse_version(gsitk_distro.version)
if not os.environ.get('DATA_PATH'):
os.environ['DATA_PATH'] = os.environ.get('SENPY_DATA', 'senpy_data')
from gsitk.datasets.datasets import DatasetManager
from gsitk.evaluation.evaluation import Evaluation as Eval # noqa: F401
from gsitk.evaluation.evaluation import EvalPipeline # noqa: F401
from sklearn.pipeline import Pipeline
modules = locals()
GSITK_AVAILABLE = True
datasets = {}
manager = DatasetManager()
for item in manager.get_datasets():
for key in item:
if key in datasets:
continue
properties = item[key]
properties['@id'] = key
datasets[key] = properties
def prepare(ds, *args, **kwargs):
return manager.prepare_datasets(ds, *args, **kwargs)
except (DistributionNotFound, ImportError) as err:
logger.debug('Error importing GSITK: {}'.format(err))
logger.warning(IMPORTMSG)
GSITK_AVAILABLE = False
GSITK_VERSION = ()
DatasetManager = Eval = Pipeline = raise_exception
DatasetManager = Eval = Pipeline = prepare = raise_exception
datasets = {}
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
'''
Meta-programming for the models.
'''
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
'''
Senpy Models.
......
#!/usr/local/bin/python
# -*- coding: utf-8 -*-
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from future import standard_library
standard_library.install_aliases()
......@@ -45,7 +61,7 @@ class PluginMeta(models.BaseMeta):
plugin_type.add(name)
alias = attrs.get('name', name).lower()
attrs['_plugin_type'] = plugin_type
logger.debug('Adding new plugin class', name, bases, attrs, plugin_type)
logger.debug('Adding new plugin class: %s %s %s %s', name, bases, attrs, plugin_type)
attrs['name'] = alias
if 'description' not in attrs:
doc = attrs.get('__doc__', None)
......@@ -94,7 +110,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
Provides a canonical name for plugins and serves as base for other
kinds of plugins.
"""
logger.debug("Initialising {}".format(info))
logger.debug("Initialising %s", info)
super(Plugin, self).__init__(**kwargs)
if info:
self.update(info)
......@@ -164,8 +180,7 @@ class Plugin(with_metaclass(PluginMeta, models.Plugin)):
def process_entries(self, entries, activity):
for entry in entries:
self.log.debug('Processing entry with plugin {}: {}'.format(
self, entry))
self.log.debug('Processing entry with plugin %s: %s', self, entry)
results = self.process_entry(entry, activity)
if inspect.isgenerator(results):
for result in results:
......@@ -347,6 +362,9 @@ class Evaluable(Plugin):
def evaluate_func(self, X, activity=None):
raise Exception('Implement the evaluate_func function')
def evaluate(self, *args, **kwargs):
return evaluate([self], *args, **kwargs)
class SentimentPlugin(Analyser, Evaluable, models.SentimentPlugin):
'''
......@@ -831,6 +849,9 @@ def evaluate(plugins, datasets, **kwargs):
if not hasattr(plug, 'as_pipe'):
raise models.Error('Plugin {} cannot be evaluated'.format(plug.name))
if not isinstance(datasets, dict):
datasets = gsitk_compat.prepare(datasets, download=True)
tuples = list(product(plugins, datasets))
missing = []
for (p, d) in tuples:
......@@ -844,12 +865,12 @@ def evaluate(plugins, datasets, **kwargs):
new_ev = evaluations_to_JSONLD(results, **kwargs)
for ev in new_ev:
dataset = ev.evaluatesOn
model = ev.evaluates.rstrip('__' + dataset)
model = ev.evaluates
cached_evs[(model, dataset)] = ev
evaluations = []
print(tuples, 'Cached evs', cached_evs)
logger.debug('%s. Cached evs: %s', tuples, cached_evs)
for (p, d) in tuples:
print('Adding', d, p)
logger.debug('Adding %s, %s', d, p)
evaluations.append(cached_evs[(p.id, d)])
return evaluations
......@@ -868,7 +889,7 @@ def evaluations_to_JSONLD(results, flatten=False):
if row.get('CV', True):
evaluation['@type'] = ['StaticCV', 'Evaluation']
evaluation.evaluatesOn = row['Dataset']
evaluation.evaluates = row['Model']
evaluation.evaluates = row['Model'].rstrip('__' + row['Dataset'])
i = 0
if flatten:
metric = models.Metric()
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from senpy.plugins import Transformation
from senpy.models import Entry
from nltk.tokenize.punkt import PunktSentenceTokenizer
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from senpy.plugins import EmotionConversionPlugin
from senpy.models import EmotionSet, Emotion, Error
......@@ -85,7 +101,13 @@ class CentroidConversion(EmotionConversionPlugin):
def distance(centroid):
return sum(distance_k(centroid, original, k) for k in dimensions)
emotion = min(centroids, key=lambda x: distance(centroids[x]))
distances = {k: distance(centroids[k]) for k in centroids}
logger.debug('Converting %s', original)
logger.debug('Centroids: %s', centroids)
logger.debug('Distances: %s', distances)
emotion = min(distances, key=lambda x: distances[x])
result = Emotion(onyx__hasEmotionCategory=emotion)
result.onyx__algorithmConfidence = distance(centroids[emotion])
......
......@@ -9,30 +9,30 @@ centroids:
anger:
A: 6.95
D: 5.1
V: 2.7
P: 2.7
disgust:
A: 5.3
D: 8.05
V: 2.7
P: 2.7
fear:
A: 6.5
D: 3.6
V: 3.2
P: 3.2
happiness:
A: 7.22
D: 6.28
V: 8.6
P: 8.6
sadness:
A: 5.21
D: 2.82
V: 2.21
P: 2.21
centroids_direction:
- emoml:big6
- emoml:pad
- emoml:pad-dimensions
aliases: # These are aliases for any key in the centroid, to avoid repeating a long name several times
A: emoml:pad-dimensions:arousal
V: emoml:pad-dimensions:valence
D: emoml:pad-dimensions:dominance
P: emoml:pad-dimensions_pleasure
A: emoml:pad-dimensions_arousal
D: emoml:pad-dimensions_dominance
anger: emoml:big6anger
disgust: emoml:big6disgust
fear: emoml:big6fear
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from senpy import PostProcessing, easy_test
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import requests
import json
......
......@@ -174,10 +174,18 @@ function add_plugin_pipeline(){
function draw_datasets(){
html = "";
repeated_html = "<input class=\"checks-datasets\" type=\"checkbox\" value=\"";
for (dataset in datasets){
html += repeated_html+datasets[dataset]["@id"]+"\">"+datasets[dataset]["@id"];
html += "<br>"
ds = datasets[dataset]
// html += repeated_html+datasets[dataset]["@id"]+"\">"+datasets[dataset]["@id"];
html += `
<span class="d-inline-block" tabindex="0" data-toggle="tooltip" title="Instances: ${ds["stats"]["instances"]}">
<div class="form-check form-check-inline">
<input class="form-check-input checks-datasets" type="checkbox" value="${ds["@id"]}">
<label class="form-check-label" for="defaultCheck1">${ds["@id"]}</label>
</div>
</span>
`
}
document.getElementById("datasets").innerHTML = html;
}
......
......@@ -233,28 +233,43 @@ In Data Science and Advanced Analytics (DSAA),
<div class="tab-pane" role="tabpanel" aria-labelledby="nav-evaluate" id="evaluate">
<div class="card my-2">
<div class="card-body">
<p>Automatically evaluate the classification performance of your plugin in several public datasets, and compare it with other plugins.</p>
<p>The datasets will be automatically downloaded if they are not already available locally. Depending on the size of the dataset and the speed of the plugin, the evaluation may take a long time.</p>
<form id="form" class="container" onsubmit="" accept-charset="utf-8">
<div>
<p>Automatically evaluate the classification performance of your plugin in several public datasets, and compare it with other plugins.</p>
<p>The datasets will be automatically downloaded if they are not already available locally. Depending on the size of the dataset and the speed of the plugin, the evaluation may take a long time.</p>
<label>Select the plugin:</label>
<select id="plugins-eval" name="plugins-eval" class=plugin onchange="draw_extra_parameters()">
</select>
<div class="card my-2">
<div class="card-header">
<h5>
Select the plugin.
</h5>
</div>
<div id="plugin_selection" class="card-body">
<select id="plugins-eval" name="plugins-eval" class=plugin onchange="draw_extra_parameters()">
</select>
</div>
</div>
<div>
<label>Select the datasets:</label>
<div id="datasets" name="datasets" >
</select>
<div class="card my-2">
<div class="card-header">
<h5>
Select the dataset.
</h5>
</div>
<div id="dataset_selection" class="card-body">
<div id="datasets" name="datasets" >
</div>
</div>
<button id="doevaluate" class="btn btn-lg btn-primary" onclick="evaluate_JSON()">Evaluate Plugin</button>
<!--<button id="visualise" name="type" type="button">Visualise!</button>-->
</div>
<!--<button id="visualise" name="type" type="button">Visualise!</button>-->
<button id="doevaluate" class="btn btn-lg btn-primary" onclick="evaluate_JSON()">Evaluate Plugin</button>
</form>
</div>
</div>
<div class="card my-2">
<div id="loading-results" class="loading"></div>
<span id="input_request_eval"></span>
<div id="input_request_eval"></div>
<div id="evaluate-div">
<ul class="nav nav-pills" role="tablist">
......@@ -273,23 +288,25 @@ In Data Science and Advanced Analytics (DSAA),
</div>
</div>
<div class="tab-pane" role="tabpanel" aria-labelledby="" id="evaluate-table">
<table id="eval_table" class="table table-condensed">
<thead>
<tr>
<th>Plugin</th>
<th>Dataset</th>
<th>Accuracy</th>
<th>Precision_macro</th>
<th>Recall_macro</th>
<th>F1_macro</th>
<th>F1_weighted</th>
<th>F1_micro</th>
<th>F1</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
<div>
<table id="eval_table" class="table table-condensed">
<thead>
<tr>
<th>Plugin</th>
<th>Dataset</th>
<th>Accuracy</th>
<th>Precision_macro</th>
<th>Recall_macro</th>
<th>F1_macro</th>
<th>F1_weighted</th>
<th>F1_micro</th>
<th>F1</th>
</tr>
</thead>
<tbody>
</tbody>
</table>
</div>
</div>
</div>
</div>
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from past.builtins import basestring
import os
......
#
# Copyright 2014 Grupo de Sistemas Inteligentes (GSI) DIT, UPM