diff --git a/crawl4ai/chunking_strategy.py b/crawl4ai/chunking_strategy.py index f46cb667c..2973ceab7 100644 --- a/crawl4ai/chunking_strategy.py +++ b/crawl4ai/chunking_strategy.py @@ -86,7 +86,7 @@ def chunk(self, text: str) -> list: sentences = sent_tokenize(text) sens = [sent.strip() for sent in sentences] - return list(set(sens)) + return list(dict.fromkeys(sens)) # Topic-based segmentation using TextTiling