Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
senpy
senpy
Commits
55be0e57
Commit
55be0e57
authored
Dec 12, 2017
by
militarpancho
Browse files
Fix
#48
parent
694201d8
Changes
1
Hide whitespace changes
Inline
Side-by-side
senpy/plugins/misc/split.py
View file @
55be0e57
...
...
@@ -12,13 +12,14 @@ class SplitPlugin(AnalysisPlugin):
def
analyse_entry
(
self
,
entry
,
params
):
chunker_type
=
params
.
get
(
"delimiter"
,
"sentence"
)
original_text
=
entry
.
get
(
'nif:isString'
,
None
)
original_text
=
entry
[
'nif:isString'
]
if
chunker_type
==
"sentence"
:
tokenizer
=
PunktSentenceTokenizer
()
if
chunker_type
==
"paragraph"
:
tokenizer
=
LineTokenizer
()
chars
=
tokenizer
.
span_tokenize
(
original_text
)
chars
=
list
(
tokenizer
.
span_tokenize
(
original_text
)
)
for
i
,
chunk
in
enumerate
(
tokenizer
.
tokenize
(
original_text
)):
print
(
chunk
)
e
=
Entry
()
e
[
'nif:isString'
]
=
chunk
if
entry
.
id
:
...
...
@@ -45,19 +46,19 @@ class SplitPlugin(AnalysisPlugin):
{
'entry'
:
{
"id"
:
":test"
,
'nif:isString'
:
'Hello
.
World
.
'
'nif:isString'
:
'Hello
\n
World'
},
'params'
:
{
'delimiter'
:
'
sentence
'
,
'delimiter'
:
'
paragraph
'
,
},
'expected'
:
[
{
"@id"
:
":test#char=0,
6
"
,
'nif:isString'
:
'Hello
.
'
"@id"
:
":test#char=0,
5
"
,
'nif:isString'
:
'Hello'
},
{
"@id"
:
":test#char=
7
,1
3
"
,
'nif:isString'
:
'World
.
'
"@id"
:
":test#char=
6
,1
1
"
,
'nif:isString'
:
'World'
}
]
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment