Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
senpy
senpy
Commits
c0aa7ddc
Commit
c0aa7ddc
authored
Apr 24, 2018
by
J. Fernando Sánchez
Browse files
Add evaluation tests
parent
5e2ada16
Changes
7
Hide whitespace changes
Inline
Side-by-side
example-plugins/basic_box_plugin.py
View file @
c0aa7ddc
...
...
@@ -18,7 +18,7 @@ class BasicBox(SentimentBox):
'default'
:
'marl:Neutral'
}
def
predict
(
self
,
input
):
def
predict
_one
(
self
,
input
):
output
=
basic
.
get_polarity
(
input
)
return
self
.
mappings
.
get
(
output
,
self
.
mappings
[
'default'
])
...
...
example-plugins/basic_plugin.py
View file @
c0aa7ddc
...
...
@@ -18,7 +18,7 @@ class Basic(MappingMixin, SentimentBox):
'default'
:
'marl:Neutral'
}
def
predict
(
self
,
input
):
def
predict
_one
(
self
,
input
):
return
basic
.
get_polarity
(
input
)
test_cases
=
[{
...
...
example-plugins/sklearn/pipeline_plugin.py
View file @
c0aa7ddc
...
...
@@ -18,7 +18,7 @@ class PipelineSentiment(MappingMixin, SentimentBox):
-
1
:
'marl:Negative'
}
def
predict
(
self
,
input
):
def
predict
_one
(
self
,
input
):
return
pipeline
.
predict
([
input
,
])[
0
]
test_cases
=
[
...
...
senpy/extensions.py
View file @
c0aa7ddc
...
...
@@ -6,7 +6,7 @@ from future import standard_library
standard_library
.
install_aliases
()
from
.
import
plugins
,
api
from
.plugins
import
Plugin
from
.plugins
import
Plugin
,
evaluate
from
.models
import
Error
,
AggregatedEvaluation
from
.blueprints
import
api_blueprint
,
demo_blueprint
,
ns_blueprint
...
...
@@ -17,7 +17,6 @@ import copy
import
errno
import
logging
#Correct this import for managing the datasets
from
gsitk.datasets.datasets
import
DatasetManager
...
...
@@ -197,13 +196,13 @@ class Senpy(object):
if
dataset
not
in
self
.
datasets
:
logger
.
debug
((
"The dataset '{}' is not valid
\n
"
"Valid datasets: {}"
).
format
(
dataset
,
self
.
datasets
.
keys
()))
self
.
datasets
.
keys
()))
raise
Error
(
status
=
404
,
message
=
"The dataset '{}' is not valid"
.
format
(
dataset
))
datasets
=
self
.
_dm
.
prepare_datasets
(
datasets_name
)
return
datasets
@
property
def
datasets
(
self
):
self
.
_dataset_list
=
{}
...
...
@@ -219,29 +218,17 @@ class Senpy(object):
def
evaluate
(
self
,
params
):
logger
.
debug
(
"evaluating request: {}"
.
format
(
params
))
try
:
results
=
AggregatedEvaluation
()
results
.
parameters
=
params
datasets
=
self
.
_get_datasets
(
results
)
plugins
=
self
.
_get_plugins
(
results
)
collector
=
list
()
for
plugin
in
plugins
:
for
eval
in
plugin
.
score
(
datasets
):
results
.
evaluations
.
append
(
eval
)
if
'with_parameters'
not
in
results
.
parameters
:
del
results
.
parameters
logger
.
debug
(
"Returning evaluation result: {}"
.
format
(
results
))
except
(
Error
,
Exception
)
as
ex
:
if
not
isinstance
(
ex
,
Error
):
msg
=
"Error during evaluation: {}
\n\t
{}"
.
format
(
ex
,
traceback
.
format_exc
())
ex
=
Error
(
message
=
msg
,
status
=
500
)
logger
.
exception
(
'Error returning evaluation result'
)
raise
ex
#results.evaluations = collector
results
=
AggregatedEvaluation
()
results
.
parameters
=
params
datasets
=
self
.
_get_datasets
(
results
)
plugins
=
self
.
_get_plugins
(
results
)
for
eval
in
evaluate
(
plugins
,
datasets
):
results
.
evaluations
.
append
(
eval
)
if
'with_parameters'
not
in
results
.
parameters
:
del
results
.
parameters
logger
.
debug
(
"Returning evaluation result: {}"
.
format
(
results
))
return
results
def
_conversion_candidates
(
self
,
fromModel
,
toModel
):
candidates
=
self
.
plugins
(
plugin_type
=
'emotionConversionPlugin'
)
for
candidate
in
candidates
:
...
...
senpy/plugins/__init__.py
View file @
c0aa7ddc
...
...
@@ -25,6 +25,8 @@ from .. import api
from
gsitk.evaluation.evaluation
import
Evaluation
as
Eval
from
sklearn.pipeline
import
Pipeline
import
numpy
as
np
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -254,7 +256,7 @@ class Box(AnalysisPlugin):
.. code-block::
entry --> input() --> predict() --> output() --> entry'
entry --> input() --> predict
_one
() --> output() --> entry'
In other words: their ``input`` method convers a query (entry and a set of parameters) into
...
...
@@ -270,15 +272,33 @@ class Box(AnalysisPlugin):
'''Transforms the results of the black box into an entry'''
return
output
def
predict
(
self
,
input
):
def
predict
_one
(
self
,
input
):
raise
NotImplementedError
(
'You should define the behavior of this plugin'
)
def
analyse_entries
(
self
,
entries
,
params
):
for
entry
in
entries
:
input
=
self
.
input
(
entry
=
entry
,
params
=
params
)
results
=
self
.
predict
(
input
=
input
)
results
=
self
.
predict
_one
(
input
=
input
)
yield
self
.
output
(
output
=
results
,
entry
=
entry
,
params
=
params
)
def
fit
(
self
,
X
=
None
,
y
=
None
):
return
self
def
transform
(
self
,
X
):
return
np
.
array
([
self
.
predict_one
(
x
)
for
x
in
X
])
def
predict
(
self
,
X
):
return
self
.
transform
(
X
)
def
fit_transform
(
self
,
X
,
y
):
self
.
fit
(
X
,
y
)
return
self
.
transform
(
X
)
def
as_pipe
(
self
):
pipe
=
Pipeline
([(
'plugin'
,
self
)])
pipe
.
name
=
self
.
name
return
pipe
class
TextBox
(
Box
):
'''A black box plugin that takes only text as input'''
...
...
@@ -323,48 +343,6 @@ class EmotionBox(TextBox, EmotionPlugin):
return
entry
class
EvaluationBox
():
'''
A box plugin where it is implemented the evaluation. It is necessary to have a pipeline.
'''
def
score
(
self
,
datasets
):
pipelines
=
[
self
.
_pipeline
]
ev
=
Eval
(
tuples
=
None
,
datasets
=
datasets
,
pipelines
=
pipelines
)
ev
.
evaluate
()
results
=
ev
.
results
evaluations
=
self
.
_evaluations_toJSONLD
(
results
)
return
evaluations
def
_evaluations_toJSONLD
(
self
,
results
):
'''
Map the evaluation results to a JSONLD scheme
'''
evaluations
=
list
()
metric_names
=
[
'accuracy'
,
'precision_macro'
,
'recall_macro'
,
'f1_macro'
,
'f1_weighted'
,
'f1_micro'
,
'f1_macro'
]
for
index
,
row
in
results
.
iterrows
():
evaluation
=
models
.
Evaluation
()
if
row
[
'CV'
]
==
False
:
evaluation
[
'@type'
]
=
[
'StaticCV'
,
'Evaluation'
]
evaluation
.
evaluatesOn
=
row
[
'Dataset'
]
evaluation
.
evaluates
=
row
[
'Model'
]
i
=
0
for
name
in
metric_names
:
metric
=
models
.
Metric
()
metric
[
'@id'
]
=
'Metric'
+
str
(
i
)
metric
[
'@type'
]
=
name
.
capitalize
()
metric
.
value
=
row
[
name
]
evaluation
.
metrics
.
append
(
metric
)
i
+=
1
evaluations
.
append
(
evaluation
)
return
evaluations
class
MappingMixin
(
object
):
@
property
...
...
@@ -605,3 +583,47 @@ def _from_loaded_module(module, info=None, **kwargs):
yield
cls
(
info
=
info
,
**
kwargs
)
for
instance
in
_instances_in_module
(
module
):
yield
instance
def
evaluate
(
plugins
,
datasets
,
**
kwargs
):
ev
=
Eval
(
tuples
=
None
,
datasets
=
datasets
,
pipelines
=
[
plugin
.
as_pipe
()
for
plugin
in
plugins
])
ev
.
evaluate
()
results
=
ev
.
results
evaluations
=
evaluations_to_JSONLD
(
results
,
**
kwargs
)
return
evaluations
def
evaluations_to_JSONLD
(
results
,
flatten
=
False
):
'''
Map the evaluation results to a JSONLD scheme
'''
evaluations
=
list
()
metric_names
=
[
'accuracy'
,
'precision_macro'
,
'recall_macro'
,
'f1_macro'
,
'f1_weighted'
,
'f1_micro'
,
'f1_macro'
]
for
index
,
row
in
results
.
iterrows
():
evaluation
=
models
.
Evaluation
()
if
row
.
get
(
'CV'
,
True
):
evaluation
[
'@type'
]
=
[
'StaticCV'
,
'Evaluation'
]
evaluation
.
evaluatesOn
=
row
[
'Dataset'
]
evaluation
.
evaluates
=
row
[
'Model'
]
i
=
0
if
flatten
:
metric
=
models
.
Metric
()
for
name
in
metric_names
:
metric
[
name
]
=
row
[
name
]
evaluation
.
metrics
.
append
(
metric
)
else
:
# We should probably discontinue this representation
for
name
in
metric_names
:
metric
=
models
.
Metric
()
metric
[
'@id'
]
=
'Metric'
+
str
(
i
)
metric
[
'@type'
]
=
name
.
capitalize
()
metric
.
value
=
row
[
name
]
evaluation
.
metrics
.
append
(
metric
)
i
+=
1
evaluations
.
append
(
evaluation
)
return
evaluations
senpy/schemas/definitions.json
View file @
c0aa7ddc
...
...
@@ -43,7 +43,7 @@
"$ref"
:
"response.json"
},
"AggregatedEvaluation"
:
{
"$ref"
:
"aggregated
e
valuation.json"
"$ref"
:
"aggregated
E
valuation.json"
},
"Evaluation"
:
{
"$ref"
:
"evaluation.json"
...
...
tests/test_plugins.py
View file @
c0aa7ddc
...
...
@@ -10,6 +10,8 @@ from senpy.models import Results, Entry, EmotionSet, Emotion, Plugins
from
senpy
import
plugins
from
senpy.plugins.conversion.emotion.centroids
import
CentroidConversion
import
pandas
as
pd
class
ShelfDummyPlugin
(
plugins
.
SentimentPlugin
,
plugins
.
ShelfMixin
):
'''Dummy plugin for tests.'''
...
...
@@ -212,7 +214,7 @@ class PluginsTest(TestCase):
def
input
(
self
,
entry
,
**
kwargs
):
return
entry
.
text
def
predict
(
self
,
input
):
def
predict
_one
(
self
,
input
):
return
'SIGN'
in
input
def
output
(
self
,
output
,
entry
,
**
kwargs
):
...
...
@@ -242,7 +244,7 @@ class PluginsTest(TestCase):
mappings
=
{
'happy'
:
'marl:Positive'
,
'sad'
:
'marl:Negative'
}
def
predict
(
self
,
input
,
**
kwargs
):
def
predict
_one
(
self
,
input
,
**
kwargs
):
return
'happy'
if
':)'
in
input
else
'sad'
test_cases
=
[
...
...
@@ -309,6 +311,40 @@ class PluginsTest(TestCase):
res
=
c
.
_backwards_conversion
(
e
)
assert
res
[
"onyx:hasEmotionCategory"
]
==
"c2"
def
test_evaluation
(
self
):
testdata
=
[]
for
i
in
range
(
50
):
testdata
.
append
([
"good"
,
1
])
for
i
in
range
(
50
):
testdata
.
append
([
"bad"
,
0
])
dataset
=
pd
.
DataFrame
(
testdata
,
columns
=
[
'text'
,
'polarity'
])
class
DummyPlugin
(
plugins
.
TextBox
):
description
=
'Plugin to test evaluation'
version
=
0
def
predict_one
(
self
,
input
):
return
0
class
SmartPlugin
(
plugins
.
TextBox
):
description
=
'Plugin to test evaluation'
version
=
0
def
predict_one
(
self
,
input
):
if
input
==
'good'
:
return
1
return
0
dpipe
=
DummyPlugin
()
results
=
plugins
.
evaluate
(
datasets
=
{
'testdata'
:
dataset
},
plugins
=
[
dpipe
],
flatten
=
True
)
dumb_metrics
=
results
[
0
].
metrics
[
0
]
assert
abs
(
dumb_metrics
[
'accuracy'
]
-
0.5
)
<
0.01
spipe
=
SmartPlugin
()
results
=
plugins
.
evaluate
(
datasets
=
{
'testdata'
:
dataset
},
plugins
=
[
spipe
],
flatten
=
True
)
smart_metrics
=
results
[
0
].
metrics
[
0
]
assert
abs
(
smart_metrics
[
'accuracy'
]
-
1
)
<
0.01
def
make_mini_test
(
fpath
):
def
mini_test
(
self
):
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment