mirror of
https://github.com/unclecode/crawl4ai.git
synced 2026-06-11 00:08:01 +00:00
Merge PR #1788: fix: guard against None LLM content and propagate finish_reason
Adds None check before processing LLM response content in both extract() and aextract(). When LLM returns no content (e.g. content filter, token limit), returns an error block with finish_reason instead of crashing. Also guards the except fallback path against None content.
This commit is contained in:
@@ -714,7 +714,11 @@ class LLMExtractionStrategy(ExtractionStrategy):
|
||||
content = response.choices[0].message.content
|
||||
blocks = None
|
||||
|
||||
if self.force_json_response:
|
||||
if not content:
|
||||
finish_reason = getattr(response.choices[0], "finish_reason", "unknown")
|
||||
blocks = [{"index": 0, "error": True, "tags": ["error"],
|
||||
"content": f"LLM returned no content (finish_reason: {finish_reason})"}]
|
||||
elif self.force_json_response:
|
||||
blocks = json.loads(_strip_markdown_fences(content))
|
||||
if isinstance(blocks, dict):
|
||||
# If it has only one key which calue is list then assign that to blocks, exampled: {"news": [..]}
|
||||
@@ -734,9 +738,8 @@ class LLMExtractionStrategy(ExtractionStrategy):
|
||||
for block in blocks:
|
||||
block["error"] = False
|
||||
except Exception:
|
||||
parsed, unparsed = split_and_parse_json_objects(
|
||||
response.choices[0].message.content
|
||||
)
|
||||
raw_content = response.choices[0].message.content or ""
|
||||
parsed, unparsed = split_and_parse_json_objects(raw_content)
|
||||
blocks = parsed
|
||||
if unparsed:
|
||||
blocks.append(
|
||||
@@ -914,7 +917,11 @@ class LLMExtractionStrategy(ExtractionStrategy):
|
||||
content = response.choices[0].message.content
|
||||
blocks = None
|
||||
|
||||
if self.force_json_response:
|
||||
if not content:
|
||||
finish_reason = getattr(response.choices[0], "finish_reason", "unknown")
|
||||
blocks = [{"index": 0, "error": True, "tags": ["error"],
|
||||
"content": f"LLM returned no content (finish_reason: {finish_reason})"}]
|
||||
elif self.force_json_response:
|
||||
blocks = json.loads(_strip_markdown_fences(content))
|
||||
if isinstance(blocks, dict):
|
||||
if len(blocks) == 1 and isinstance(list(blocks.values())[0], list):
|
||||
@@ -930,9 +937,8 @@ class LLMExtractionStrategy(ExtractionStrategy):
|
||||
for block in blocks:
|
||||
block["error"] = False
|
||||
except Exception:
|
||||
parsed, unparsed = split_and_parse_json_objects(
|
||||
response.choices[0].message.content
|
||||
)
|
||||
raw_content = response.choices[0].message.content or ""
|
||||
parsed, unparsed = split_and_parse_json_objects(raw_content)
|
||||
blocks = parsed
|
||||
if unparsed:
|
||||
blocks.append(
|
||||
|
||||
Reference in New Issue
Block a user