Commit 83e2d415 authored by militarpancho's avatar militarpancho
Browse files

Change name to split, according to issue #37

parent f8ca595b
...@@ -3,12 +3,12 @@ from senpy.models import Entry ...@@ -3,12 +3,12 @@ from senpy.models import Entry
from nltk.tokenize.punkt import PunktSentenceTokenizer from nltk.tokenize.punkt import PunktSentenceTokenizer
from nltk.tokenize.simple import LineTokenizer from nltk.tokenize.simple import LineTokenizer
import nltk import nltk
class ChunkerPlugin(AnalysisPlugin): class SplitPlugin(AnalysisPlugin):
def activate(self): def activate(self):
nltk.download('punkt') nltk.download('punkt')
def analyse_entry(self, entry, params): def analyse_entry(self, entry, params):
chunker_type = params.get("type", "sentence") chunker_type = params.get("delimiter", "sentence")
original_id = entry.id original_id = entry.id
original_text = entry.get("text", None) original_text = entry.get("text", None)
if chunker_type == "sentence": if chunker_type == "sentence":
......
--- ---
name: chunker name: split
module: chunker module: split
description: A sample plugin that chunks input text description: A sample plugin that chunks input text
author: "@militarpancho" author: "@militarpancho"
version: '0.1' version: '0.1'
url: "https://github.com/gsi-upm/senpy" url: "https://github.com/gsi-upm/senpy"
requirements: {nltk} requirements: {nltk}
extra_params: extra_params:
type: delimiter:
aliases: aliases:
- type - type
- t - t
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment