Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
social
SoMeDi Use Case - Lateral
Commits
719a426e
Commit
719a426e
authored
Nov 16, 2017
by
Alberto Pascual
Browse files
language detector in analysis, SPARQL filter by language
parent
bd081244
Pipeline
#448
passed with stages
in 34 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
analysistask.py
View file @
719a426e
...
...
@@ -175,6 +175,11 @@ class FusekiTask(luigi.Task):
for
i
,
line
in
enumerate
(
infile
):
self
.
set_status_message
(
"Lines read: %d"
%
i
)
w
=
json
.
loads
(
line
)
#print(w)
try
:
w
[
"schema:headline"
]
=
{
"@value"
:
w
[
"schema:headline"
],
"@language"
:
w
[
"language_detected"
]}
except
:
w
[
"schema:articleBody"
]
=
{
"@value"
:
w
[
"schema:articleBody"
],
"@language"
:
w
[
"language_detected"
]}
f
.
append
(
w
)
f
=
json
.
dumps
(
f
)
self
.
set_status_message
(
"JSON created"
)
...
...
analyzers/__pycache__/analysis.cpython-36.pyc
View file @
719a426e
No preview for this file type
analyzers/analysis.py
View file @
719a426e
...
...
@@ -57,7 +57,7 @@ def semanticAnalysis(i):
for
x
,
index
in
enumerate
(
r
[
"entries"
][
0
][
"entities"
]):
index
[
"nif:beginIndex"
]
=
str
(
int
(
index
[
"nif:beginIndex"
])
+
(
REQUEST_LONG
*
k
))
index
[
"nif:endIndex"
]
=
str
(
int
(
index
[
"nif:endIndex"
])
+
(
REQUEST_LONG
*
k
))
if
index
[
"@type"
]
.
split
(
'#'
)[
-
1
]
==
'ODENTITY_City'
:
if
index
[
"@type"
]
==
'ODENTITY_City'
:
try
:
geor
=
requests
.
get
(
"https://dbpedia.org/sparql?default-graph-uri=http%3A%2F%2Fdbpedia.org&query=select+%3Fcoordinates+where+%7B%0D%0A%0D%0Adbr%3A{}+georss%3Apoint+%3Fcoordinates%0D%0A%0D%0A%7D&format=application%2Fsparql-results%2Bjson"
.
format
(
index
.
get
(
"rdfs:subClassOf"
,
""
).
split
(
'/'
)[
-
1
]))
coords
=
geor
.
json
()[
'results'
][
'bindings'
][
0
][
'coordinates'
][
'value'
].
split
()
...
...
@@ -65,7 +65,7 @@ def semanticAnalysis(i):
index
[
'longitude'
]
=
coords
[
1
]
except
(
IndexError
,
json
.
decoder
.
JSONDecodeError
):
pass
if
index
[
"@type"
]
.
split
(
'#'
)[
-
1
]
in
[
'ODENTITY_Person'
,
'ODENTITY_FullName'
]:
if
index
[
"@type"
]
in
[
'ODENTITY_Person'
,
'ODENTITY_FullName'
]:
try
:
peopler
=
requests
.
get
(
"https://dbpedia.org/sparql?default-graph-uri=http%3A%2F%2Fdbpedia.org&query=select+%3Fimage+where+%7B%0D%0A++dbr%3A{}+++dbo%3Athumbnail+%3Fimage%0D%0A%0D%0A%7D+LIMIT+100&format=application%2Fsparql-results%2Bjson&CXML_redir_for_subjs=121&CXML_redir_for_hrefs=&timeout=30000&debug=on&run=+Run+Query+"
.
format
(
index
.
get
(
"rdfs:subClassOf"
,
""
).
split
(
'/'
)[
-
1
]))
index
[
'dbo:thumbnail'
]
=
peopler
.
json
()[
'results'
][
'bindings'
][
0
][
'image'
][
'value'
]
...
...
@@ -95,4 +95,8 @@ def semanticAnalysis(i):
i
[
"sentiments"
]
=
sentiments_arr
i
[
"entities"
]
=
entities_arr
i
[
"topics"
]
=
topics_arr
try
:
i
[
"language_detected"
]
=
r
[
"entries"
][
0
][
"language_detected"
]
except
:
i
[
"language_detected"
]
=
"en"
return
i
luigi-task-hist.db
View file @
719a426e
No preview for this file type
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment