fix: preserve rowspan/colspan in cleaned_html (#1920)

Add rowspan and colspan to IMPORTANT_ATTRS so they survive
attribute stripping in remove_unwanted_attributes_fast().
This commit is contained in:
ntohidi
2026-04-16 12:42:36 +08:00
parent 3d02d75edb
commit 7bfc547bce

View File

@@ -47,7 +47,7 @@ WORD_TOKEN_RATE = 1.3
MIN_WORD_THRESHOLD = 1
IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD = 1
IMPORTANT_ATTRS = ["src", "href", "alt", "title", "width", "height", "class", "id"]
IMPORTANT_ATTRS = ["src", "href", "alt", "title", "width", "height", "class", "id", "rowspan", "colspan"]
ONLY_TEXT_ELIGIBLE_TAGS = [
"b",
"i",