From 7bfc547bceb3133f9e5f855dfa2f5a1d8fd9be81 Mon Sep 17 00:00:00 2001 From: ntohidi Date: Thu, 16 Apr 2026 12:42:36 +0800 Subject: [PATCH] fix: preserve rowspan/colspan in cleaned_html (#1920) Add rowspan and colspan to IMPORTANT_ATTRS so they survive attribute stripping in remove_unwanted_attributes_fast(). --- crawl4ai/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawl4ai/config.py b/crawl4ai/config.py index 9cd02f97..507965af 100644 --- a/crawl4ai/config.py +++ b/crawl4ai/config.py @@ -47,7 +47,7 @@ WORD_TOKEN_RATE = 1.3 MIN_WORD_THRESHOLD = 1 IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD = 1 -IMPORTANT_ATTRS = ["src", "href", "alt", "title", "width", "height", "class", "id"] +IMPORTANT_ATTRS = ["src", "href", "alt", "title", "width", "height", "class", "id", "rowspan", "colspan"] ONLY_TEXT_ELIGIBLE_TAGS = [ "b", "i",