diff --git a/crawl4ai/adaptive_crawler.py b/crawl4ai/adaptive_crawler.py
index 5611d820..6aa1d3c2 100644
--- a/crawl4ai/adaptive_crawler.py
+++ b/crawl4ai/adaptive_crawler.py
@@ -1842,7 +1842,7 @@ class AdaptiveCrawler:
         
         return export_dict
     
-    def import_knowledge_base(self, filepath: Union[str, Path], format: str = "jsonl") -> None:
+    async def import_knowledge_base(self, filepath: Union[str, Path], format: str = "jsonl") -> None:
         """Import a knowledge base from a file
         
         Args:
@@ -1871,7 +1871,7 @@ class AdaptiveCrawler:
             self.state.knowledge_base.extend(imported_results)
             
             # Update state with imported data
-            asyncio.run(self.strategy.update_state(self.state, imported_results))
+            await self.strategy.update_state(self.state, imported_results)
             
             print(f"Imported {len(imported_results)} documents from {filepath}")
         else:
diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py
index dcc7130c..5ec368f1 100644
--- a/crawl4ai/async_crawler_strategy.py
+++ b/crawl4ai/async_crawler_strategy.py
@@ -1882,7 +1882,7 @@ class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy):
 
             buffered = BytesIO()
             stitched = stitched.convert("RGB")
-            stitched.save(buffered, format="BMP", quality=85)
+            stitched.save(buffered, format="PNG")
             encoded = base64.b64encode(buffered.getvalue()).decode("utf-8")
 
             return encoded
diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py
index c50916f1..a3156016 100644
--- a/crawl4ai/extraction_strategy.py
+++ b/crawl4ai/extraction_strategy.py
@@ -296,7 +296,7 @@ class CosineStrategy(ExtractionStrategy):
             return documents
 
         if len(documents) < at_least_k:
-            at_least_k = len(documents) // 2
+            at_least_k = max(1, len(documents) // 2)
 
         from sklearn.metrics.pairwise import cosine_similarity
 
@@ -451,7 +451,10 @@ class CosineStrategy(ExtractionStrategy):
         """
         # Assume `html` is a list of text chunks for this strategy
         t = time.time()
-        text_chunks = html.split(self.DEL)  # Split by lines or paragraphs as needed
+        # Split by delimiter; fall back to double-newline splitting for raw text
+        text_chunks = html.split(self.DEL)
+        if len(text_chunks) == 1:
+            text_chunks = [chunk.strip() for chunk in html.split("\n\n") if chunk.strip()]
 
         # Pre-filter documents using embeddings and semantic_filter
         text_chunks = self.filter_documents_embeddings(
diff --git a/crawl4ai/processors/pdf/__init__.py b/crawl4ai/processors/pdf/__init__.py
index a6627f13..69a6f75a 100644
--- a/crawl4ai/processors/pdf/__init__.py
+++ b/crawl4ai/processors/pdf/__init__.py
@@ -145,6 +145,7 @@ class PDFContentScrapingStrategy(ContentScrapingStrategy):
             
             # Create temp file with .pdf extension
             temp_file = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False)
+            temp_file.close()  # Close handle immediately; file persists due to delete=False
             self._temp_files.append(temp_file.name)
             
             try:
diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py
index ec68c47b..4b3d9690 100644
--- a/crawl4ai/utils.py
+++ b/crawl4ai/utils.py
@@ -1697,7 +1697,7 @@ def extract_xml_data_legacy(tags, string):
     data = {}
 
     for tag in tags:
-        pattern = f"<{tag}>(.*?)</{tag}>"
+        pattern = f"<{tag}>((?:(?!<{tag}>).)*)</{tag}>"
         match = re.search(pattern, string, re.DOTALL)
         if match:
             data[tag] = match.group(1).strip()
@@ -1726,7 +1726,7 @@ def extract_xml_data(tags, string):
     data = {}
 
     for tag in tags:
-        pattern = f"<{tag}>(.*?)</{tag}>"
+        pattern = f"<{tag}>((?:(?!<{tag}>).)*)</{tag}>"
         matches = re.findall(pattern, string, re.DOTALL)
         
         if matches:
@@ -2294,14 +2294,14 @@ def normalize_url(
     # IMPORTANT: Don't use quote(unquote()) as it mangles + signs in URLs
     # The path from urlparse is already properly encoded
     path = parsed.path
-    if path.endswith('/') and path != '/':
-        path = path.rstrip('/')
+    # Preserve trailing slashes -- they are semantically significant per RFC 3986
+    # e.g. /page/9123/ and /page/9123 may return different responses
 
     # ── query ──
     query = parsed.query
     if query:
         # explode, mutate, then rebuild
-        params = [(k.lower(), v) for k, v in parse_qsl(query, keep_blank_values=True)]
+        params = [(k, v) for k, v in parse_qsl(query, keep_blank_values=True)]
 
         if drop_query_tracking:
             default_tracking = {
@@ -2310,7 +2310,7 @@ def normalize_url(
             }
             if extra_drop_params:
                 default_tracking |= {p.lower() for p in extra_drop_params}
-            params = [(k, v) for k, v in params if k not in default_tracking]
+            params = [(k, v) for k, v in params if k.lower() not in default_tracking]
 
         if sort_query:
             params.sort(key=lambda kv: kv[0])
@@ -2383,7 +2383,7 @@ def normalize_url_for_deep_crawl(href, base_url, preserve_https=False, original_
     normalized = urlunparse((
         parsed.scheme,
         netloc,
-        parsed.path.rstrip('/'),  # Normalize trailing slash
+        parsed.path or '/',  # Preserve trailing slash
         parsed.params,
         query,
         fragment
@@ -2422,7 +2422,7 @@ def efficient_normalize_url_for_deep_crawl(href, base_url, preserve_https=False,
     normalized = urlunparse((
         parsed.scheme,
         parsed.netloc.lower(),
-        parsed.path.rstrip('/'),
+        parsed.path or '/',  # Preserve trailing slash
         parsed.params,
         parsed.query,
         ''  # Remove fragment
diff --git a/deploy/docker/c4ai-doc-context.md b/deploy/docker/c4ai-doc-context.md
index abfd3637..0120e1b6 100644
--- a/deploy/docker/c4ai-doc-context.md
+++ b/deploy/docker/c4ai-doc-context.md
@@ -8589,7 +8589,7 @@ Real sites often have **nested** or repeated data—like categories containing p
 
 We have a **sample e-commerce** HTML file on GitHub (example):
 ```
-https://gist.githubusercontent.com/githubusercontent/2d7b8ba3cd8ab6cf3c8da771ddb36878/raw/1ae2f90c6861ce7dd84cc50d3df9920dee5e1fd2/sample_ecommerce.html
+https://raw.githubusercontent.com/unclecode/crawl4ai/main/docs/examples/sample_ecommerce.html
 ```
 This snippet includes categories, products, features, reviews, and related items. Let’s see how to define a schema that fully captures that structure **without LLM**.
 
@@ -8721,7 +8721,7 @@ async def extract_ecommerce_data():
     
     async with AsyncWebCrawler(verbose=True) as crawler:
         result = await crawler.arun(
-            url="https://gist.githubusercontent.com/githubusercontent/2d7b8ba3cd8ab6cf3c8da771ddb36878/raw/1ae2f90c6861ce7dd84cc50d3df9920dee5e1fd2/sample_ecommerce.html",
+            url="https://raw.githubusercontent.com/unclecode/crawl4ai/main/docs/examples/sample_ecommerce.html",
             extraction_strategy=strategy,
             config=config
         )
diff --git a/deploy/docker/static/playground/index.html b/deploy/docker/static/playground/index.html
index 510a6620..e8c55037 100644
--- a/deploy/docker/static/playground/index.html
+++ b/deploy/docker/static/playground/index.html
@@ -128,6 +128,10 @@
             opacity: 1;
         }
 
+        #adv-editor .CodeMirror {
+            height: 100% !important;
+        }
+
         /* copid text highlighted */
         .highlighted {
             background-color: rgba(78, 255, 255, 0.2) !important;
@@ -267,7 +271,7 @@
                     </div>
 
                     <!-- CodeMirror host -->
-                    <div id="adv-editor" class="mt-2 border border-border rounded overflow-hidden h-40"></div>
+                    <div id="adv-editor" class="mt-2 border border-border rounded overflow-auto" style="height: 160px; min-height: 160px; max-height: 500px; resize: vertical;"></div>
                 </details>
 
                 <div class="flex space-x-2">
diff --git a/deploy/docker/utils.py b/deploy/docker/utils.py
index de44852b..585d8941 100644
--- a/deploy/docker/utils.py
+++ b/deploy/docker/utils.py
@@ -19,11 +19,113 @@ class FilterType(str, Enum):
     BM25 = "bm25"
     LLM = "llm"
 
+DEFAULT_CONFIG = {
+    "app": {
+        "title": "Crawl4AI API",
+        "version": "1.0.0",
+        "host": "0.0.0.0",
+        "port": 11235,
+        "reload": False,
+        "workers": 1,
+        "timeout_keep_alive": 300,
+    },
+    "llm": {
+        "provider": "openai/gpt-4o-mini",
+    },
+    "redis": {
+        "host": "localhost",
+        "port": 6379,
+        "db": 0,
+        "password": "",
+        "task_ttl_seconds": 3600,
+        "ssl": False,
+    },
+    "rate_limiting": {
+        "enabled": True,
+        "default_limit": "1000/minute",
+        "trusted_proxies": [],
+        "storage_uri": "memory://",
+    },
+    "security": {
+        "enabled": False,
+        "jwt_enabled": False,
+        "api_token": "",
+        "https_redirect": False,
+        "trusted_hosts": ["*"],
+        "headers": {
+            "x_content_type_options": "nosniff",
+            "x_frame_options": "DENY",
+            "content_security_policy": "default-src 'self'",
+            "strict_transport_security": "max-age=63072000; includeSubDomains",
+        },
+    },
+    "crawler": {
+        "base_config": {"simulate_user": True},
+        "memory_threshold_percent": 95.0,
+        "rate_limiter": {"enabled": True, "base_delay": [1.0, 2.0]},
+        "timeouts": {"stream_init": 30.0, "batch_process": 300.0},
+        "pool": {"max_pages": 40, "idle_ttl_sec": 300},
+        "browser": {
+            "kwargs": {"headless": True, "text_mode": True},
+            "extra_args": [
+                "--no-sandbox",
+                "--disable-dev-shm-usage",
+                "--disable-gpu",
+                "--disable-software-rasterizer",
+                "--disable-web-security",
+                "--allow-insecure-localhost",
+                "--ignore-certificate-errors",
+            ],
+        },
+    },
+    "logging": {
+        "level": "INFO",
+        "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    },
+    "observability": {
+        "prometheus": {"enabled": True, "endpoint": "/metrics"},
+        "health_check": {"endpoint": "/health"},
+    },
+    "webhooks": {
+        "enabled": True,
+        "default_url": None,
+        "data_in_payload": False,
+        "retry": {
+            "max_attempts": 5,
+            "initial_delay_ms": 1000,
+            "max_delay_ms": 32000,
+            "timeout_ms": 30000,
+        },
+        "headers": {"User-Agent": "Crawl4AI-Webhook/1.0"},
+    },
+}
+
+
+def _deep_merge(base: dict, override: dict) -> dict:
+    """Recursively merge override into base. Override values take precedence."""
+    merged = base.copy()
+    for key, value in override.items():
+        if key in merged and isinstance(merged[key], dict) and isinstance(value, dict):
+            merged[key] = _deep_merge(merged[key], value)
+        else:
+            merged[key] = value
+    return merged
+
+
 def load_config() -> Dict:
     """Load and return application configuration with environment variable overrides."""
     config_path = Path(__file__).parent / "config.yml"
     with open(config_path, "r") as config_file:
-        config = yaml.safe_load(config_file)
+        user_config = yaml.safe_load(config_file) or {}
+
+    # Deep-merge user config on top of defaults so missing keys get safe values
+    config = _deep_merge(DEFAULT_CONFIG, user_config)
+
+    for section in DEFAULT_CONFIG:
+        if section not in user_config:
+            logging.warning(
+                f"Config section '{section}' missing from config.yml, using defaults"
+            )
     
     # Override LLM provider from environment if set
     llm_provider = os.environ.get("LLM_PROVIDER")
diff --git a/docs/examples/adaptive_crawling/export_import_kb.py b/docs/examples/adaptive_crawling/export_import_kb.py
index c0a72c2c..476eb700 100644
--- a/docs/examples/adaptive_crawling/export_import_kb.py
+++ b/docs/examples/adaptive_crawling/export_import_kb.py
@@ -114,7 +114,7 @@ async def import_and_continue():
         
         # Import existing knowledge base
         print(f"\n1. Importing knowledge base from {kb_path}")
-        adaptive.import_knowledge_base(kb_path)
+        await adaptive.import_knowledge_base(kb_path)
         
         print(f"   - Imported {len(adaptive.state.knowledge_base)} documents")
         print(f"   - Existing URLs: {len(adaptive.state.crawled_urls)}")
@@ -175,10 +175,10 @@ async def share_knowledge_bases():
         merged_crawler = AdaptiveCrawler(crawler)
         
         # Import both knowledge bases
-        merged_crawler.import_knowledge_base(project_a_kb)
+        await merged_crawler.import_knowledge_base(project_a_kb)
         initial_size = len(merged_crawler.state.knowledge_base)
-        
-        merged_crawler.import_knowledge_base(project_b_kb)
+
+        await merged_crawler.import_knowledge_base(project_b_kb)
         final_size = len(merged_crawler.state.knowledge_base)
         
         print(f"   - Project A documents: {initial_size}")
diff --git a/docs/md_v2/api/adaptive-crawler.md b/docs/md_v2/api/adaptive-crawler.md
index af92ee3a..5bd5bf44 100644
--- a/docs/md_v2/api/adaptive-crawler.md
+++ b/docs/md_v2/api/adaptive-crawler.md
@@ -161,7 +161,7 @@ adaptive.export_knowledge_base("my_knowledge.jsonl")
 Import a previously exported knowledge base.
 
 ```python
-def import_knowledge_base(
+async def import_knowledge_base(
     self,
     path: Union[str, Path]
 ) -> None
diff --git a/docs/md_v2/complete-sdk-reference.md b/docs/md_v2/complete-sdk-reference.md
index 6fd974ea..aa0517b2 100644
--- a/docs/md_v2/complete-sdk-reference.md
+++ b/docs/md_v2/complete-sdk-reference.md
@@ -4128,7 +4128,7 @@ That's how you keep the config self-contained, illustrate **XPath** usage, and d
 ## 3. Advanced Schema & Nested Structures
 ### Sample E-Commerce HTML
 ```
-https://gist.githubusercontent.com/githubusercontent/2d7b8ba3cd8ab6cf3c8da771ddb36878/raw/1ae2f90c6861ce7dd84cc50d3df9920dee5e1fd2/sample_ecommerce.html
+https://raw.githubusercontent.com/unclecode/crawl4ai/main/docs/examples/sample_ecommerce.html
 ```
 ```python
 schema = {
@@ -4253,7 +4253,7 @@ async def extract_ecommerce_data():
 
     async with AsyncWebCrawler(verbose=True) as crawler:
         result = await crawler.arun(
-            url="https://gist.githubusercontent.com/githubusercontent/2d7b8ba3cd8ab6cf3c8da771ddb36878/raw/1ae2f90c6861ce7dd84cc50d3df9920dee5e1fd2/sample_ecommerce.html",
+            url="https://raw.githubusercontent.com/unclecode/crawl4ai/main/docs/examples/sample_ecommerce.html",
             extraction_strategy=strategy,
             config=config
         )
diff --git a/docs/md_v2/core/adaptive-crawling.md b/docs/md_v2/core/adaptive-crawling.md
index 1a43c9f2..b3cf2672 100644
--- a/docs/md_v2/core/adaptive-crawling.md
+++ b/docs/md_v2/core/adaptive-crawling.md
@@ -274,7 +274,7 @@ adaptive.export_knowledge_base("knowledge_base.jsonl")
 
 # Import into another session
 new_adaptive = AdaptiveCrawler(crawler)
-new_adaptive.import_knowledge_base("knowledge_base.jsonl")
+await new_adaptive.import_knowledge_base("knowledge_base.jsonl")
 ```
 
 ## Best Practices
diff --git a/docs/md_v2/extraction/no-llm-strategies.md b/docs/md_v2/extraction/no-llm-strategies.md
index 318b5106..63138b30 100644
--- a/docs/md_v2/extraction/no-llm-strategies.md
+++ b/docs/md_v2/extraction/no-llm-strategies.md
@@ -191,7 +191,7 @@ Real sites often have **nested** or repeated data—like categories containing p
 
 We have a **sample e-commerce** HTML file on GitHub (example):
 ```
-https://gist.githubusercontent.com/githubusercontent/2d7b8ba3cd8ab6cf3c8da771ddb36878/raw/1ae2f90c6861ce7dd84cc50d3df9920dee5e1fd2/sample_ecommerce.html
+https://raw.githubusercontent.com/unclecode/crawl4ai/main/docs/examples/sample_ecommerce.html
 ```
 This snippet includes categories, products, features, reviews, and related items. Let's see how to define a schema that fully captures that structure **without LLM**.
 
@@ -323,7 +323,7 @@ async def extract_ecommerce_data():
     
     async with AsyncWebCrawler(verbose=True) as crawler:
         result = await crawler.arun(
-            url="https://gist.githubusercontent.com/githubusercontent/2d7b8ba3cd8ab6cf3c8da771ddb36878/raw/1ae2f90c6861ce7dd84cc50d3df9920dee5e1fd2/sample_ecommerce.html",
+            url="https://raw.githubusercontent.com/unclecode/crawl4ai/main/docs/examples/sample_ecommerce.html",
             extraction_strategy=strategy,
             config=config
         )
diff --git a/docs/releases_review/crawl4ai_v0_7_0_showcase.py b/docs/releases_review/crawl4ai_v0_7_0_showcase.py
index 29c056f0..d78af7f8 100644
--- a/docs/releases_review/crawl4ai_v0_7_0_showcase.py
+++ b/docs/releases_review/crawl4ai_v0_7_0_showcase.py
@@ -543,7 +543,7 @@ async def adaptive_crawling_demo(auto_mode=False):
         adaptive2 = AdaptiveCrawler(crawler, export_config)
         
         # Import the knowledge base
-        adaptive2.import_knowledge_base(kb_export)
+        await adaptive2.import_knowledge_base(kb_export)
         console.print(f"✓ Imported {len(adaptive2.state.knowledge_base)} documents")
         console.print(f"✓ Starting confidence: {int(adaptive2.confidence * 100)}%")
         
diff --git a/tests/adaptive/test_embedding_strategy.py b/tests/adaptive/test_embedding_strategy.py
index 37433065..6e34b85e 100644
--- a/tests/adaptive/test_embedding_strategy.py
+++ b/tests/adaptive/test_embedding_strategy.py
@@ -233,7 +233,7 @@ async def test_knowledge_export_import():
         crawler2 = AdaptiveCrawler(crawler=crawler, config=config)
         
         console.print("\n[cyan]Importing knowledge base...[/cyan]")
-        crawler2.import_knowledge_base(export_path)
+        await crawler2.import_knowledge_base(export_path)
         
         # Continue with new query - should be faster
         console.print("\n[cyan]Extending with new query...[/cyan]")