mirror of
https://github.com/unclecode/crawl4ai.git
synced 2026-06-11 00:08:01 +00:00
Merge pull request #1913 from unclecode/fix/nlp-sentence-chunking-1909
fix(chunking): preserve sentence order in NlpSentenceChunking
This commit is contained in:
@@ -71,7 +71,6 @@ class NlpSentenceChunking(ChunkingStrategy):
|
||||
"""
|
||||
Initialize the NlpSentenceChunking object.
|
||||
"""
|
||||
from crawl4ai.le.legacy.model_loader import load_nltk_punkt
|
||||
load_nltk_punkt()
|
||||
|
||||
def chunk(self, text: str) -> list:
|
||||
@@ -86,7 +85,7 @@ class NlpSentenceChunking(ChunkingStrategy):
|
||||
sentences = sent_tokenize(text)
|
||||
sens = [sent.strip() for sent in sentences]
|
||||
|
||||
return list(set(sens))
|
||||
return sens
|
||||
|
||||
|
||||
# Topic-based segmentation using TextTiling
|
||||
|
||||
Reference in New Issue
Block a user