Merge pull request #1913 from unclecode/fix/nlp-sentence-chunking-1909

fix(chunking): preserve sentence order in NlpSentenceChunking
This commit is contained in:
Nasrin
2026-04-16 10:24:59 +02:00
committed by GitHub

View File

@@ -71,7 +71,6 @@ class NlpSentenceChunking(ChunkingStrategy):
"""
Initialize the NlpSentenceChunking object.
"""
from crawl4ai.le.legacy.model_loader import load_nltk_punkt
load_nltk_punkt()
def chunk(self, text: str) -> list:
@@ -86,7 +85,7 @@ class NlpSentenceChunking(ChunkingStrategy):
sentences = sent_tokenize(text)
sens = [sent.strip() for sent in sentences]
return list(set(sens))
return sens
# Topic-based segmentation using TextTiling