mirror of
https://github.com/unclecode/crawl4ai.git
synced 2026-06-11 16:28:11 +00:00
fix: preserve rowspan/colspan in cleaned_html (#1920)
Add rowspan and colspan to IMPORTANT_ATTRS so they survive attribute stripping in remove_unwanted_attributes_fast().
This commit is contained in:
@@ -47,7 +47,7 @@ WORD_TOKEN_RATE = 1.3
|
||||
MIN_WORD_THRESHOLD = 1
|
||||
IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD = 1
|
||||
|
||||
IMPORTANT_ATTRS = ["src", "href", "alt", "title", "width", "height", "class", "id"]
|
||||
IMPORTANT_ATTRS = ["src", "href", "alt", "title", "width", "height", "class", "id", "rowspan", "colspan"]
|
||||
ONLY_TEXT_ELIGIBLE_TAGS = [
|
||||
"b",
|
||||
"i",
|
||||
|
||||
Reference in New Issue
Block a user