feat: add avoid_ads/avoid_css resource filtering and pool release lifecycle

Add opt-in BrowserConfig flags (avoid_ads, avoid_css) for blocking ad/tracker domains and CSS resources at the browser context level. Refactor crawler pool with release_crawler() and active_requests tracking to prevent janitor from closing browsers with in-flight requests. Add proper finally blocks to all Docker API/server handlers. Update docs for new config options. Inspired by #1689.
2026-06-10 15:58:15 +00:00 · 2026-02-25 05:56:29 +00:00
parent 8d35d17d01
commit c0912f7234
11 changed files with 595 additions and 106 deletions
--- a/crawl4ai/async_configs.py
+++ b/crawl4ai/async_configs.py
@@ -579,6 +579,11 @@ class BrowserConfig:
                                        process to reclaim leaked memory. 0 = disabled.
                                        Recommended: 500-1000 for long-running crawlers.
                                        Default: 0.
+        avoid_ads (bool): If True, blocks ad-related and tracker network requests at the
+                          browser context level using a curated blocklist of top ad/tracker
+                          domains. Default: False.
+        avoid_css (bool): If True, blocks loading of CSS files (css, less, scss, sass) to
+                          reduce resource usage and speed up crawling. Default: False.
    """

    def __init__(
@@ -627,6 +632,8 @@ class BrowserConfig:
        debugging_port: int = 9222,
        host: str = "localhost",
        enable_stealth: bool = False,
+        avoid_ads: bool = False,
+        avoid_css: bool = False,
        init_scripts: List[str] = None,
        memory_saving_mode: bool = False,
        max_pages_before_recycle: int = 0,
@@ -692,6 +699,8 @@ class BrowserConfig:
        self.debugging_port = debugging_port
        self.host = host
        self.enable_stealth = enable_stealth
+        self.avoid_ads = avoid_ads
+        self.avoid_css = avoid_css
        self.init_scripts = init_scripts if init_scripts is not None else []
        self.memory_saving_mode = memory_saving_mode
        self.max_pages_before_recycle = max_pages_before_recycle
@@ -785,6 +794,8 @@ class BrowserConfig:
            "debugging_port": self.debugging_port,
            "host": self.host,
            "enable_stealth": self.enable_stealth,
+            "avoid_ads": self.avoid_ads,
+            "avoid_css": self.avoid_css,
            "init_scripts": self.init_scripts,
            "memory_saving_mode": self.memory_saving_mode,
            "max_pages_before_recycle": self.max_pages_before_recycle,
--- a/crawl4ai/browser_manager.py
+++ b/crawl4ai/browser_manager.py
@@ -1258,59 +1258,47 @@ class BrowserManager:
        }
        proxy_settings = {"server": self.config.proxy} if self.config.proxy else None

-        blocked_extensions = [
+        # CSS extensions (blocked separately via avoid_css flag)
+        css_extensions = ["css", "less", "scss", "sass"]
+
+        # Static resource extensions (blocked when text_mode is enabled)
+        static_extensions = [
            # Images
-            "jpg",
-            "jpeg",
-            "png",
-            "gif",
-            "webp",
-            "svg",
-            "ico",
-            "bmp",
-            "tiff",
-            "psd",
+            "jpg", "jpeg", "png", "gif", "webp", "svg", "ico", "bmp", "tiff", "psd",
            # Fonts
-            "woff",
-            "woff2",
-            "ttf",
-            "otf",
-            "eot",
-            # Styles
-            # 'css', 'less', 'scss', 'sass',
+            "woff", "woff2", "ttf", "otf", "eot",
            # Media
-            "mp4",
-            "webm",
-            "ogg",
-            "avi",
-            "mov",
-            "wmv",
-            "flv",
-            "m4v",
-            "mp3",
-            "wav",
-            "aac",
-            "m4a",
-            "opus",
-            "flac",
+            "mp4", "webm", "ogg", "avi", "mov", "wmv", "flv", "m4v",
+            "mp3", "wav", "aac", "m4a", "opus", "flac",
            # Documents
-            "pdf",
-            "doc",
-            "docx",
-            "xls",
-            "xlsx",
-            "ppt",
-            "pptx",
+            "pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx",
            # Archives
-            "zip",
-            "rar",
-            "7z",
-            "tar",
-            "gz",
+            "zip", "rar", "7z", "tar", "gz",
            # Scripts and data
-            "xml",
-            "swf",
-            "wasm",
+            "xml", "swf", "wasm",
+        ]
+
+        # Ad and tracker domain patterns (curated from uBlock/EasyList sources)
+        ad_tracker_patterns = [
+            "**/google-analytics.com/**",
+            "**/googletagmanager.com/**",
+            "**/googlesyndication.com/**",
+            "**/doubleclick.net/**",
+            "**/adservice.google.com/**",
+            "**/adsystem.com/**",
+            "**/adzerk.net/**",
+            "**/adnxs.com/**",
+            "**/ads.linkedin.com/**",
+            "**/facebook.net/**",
+            "**/analytics.twitter.com/**",
+            "**/ads-twitter.com/**",
+            "**/hotjar.com/**",
+            "**/clarity.ms/**",
+            "**/scorecardresearch.com/**",
+            "**/pixel.wp.com/**",
+            "**/amazon-adsystem.com/**",
+            "**/mixpanel.com/**",
+            "**/segment.com/**",
        ]

        # Common context settings
@@ -1364,11 +1352,21 @@ class BrowserManager:
        # Create and return the context with all settings
        context = await self.browser.new_context(**context_settings)

-        # Apply text mode settings if enabled
+        # Build dynamic blocking list based on config flags
+        to_block = []
+        if self.config.avoid_css:
+            to_block.extend(css_extensions)
        if self.config.text_mode:
-            # Create and apply route patterns for each extension
-            for ext in blocked_extensions:
+            to_block.extend(static_extensions)
+
+        if to_block:
+            for ext in to_block:
                await context.route(f"**/*.{ext}", lambda route: route.abort())
+
+        if self.config.avoid_ads:
+            for pattern in ad_tracker_patterns:
+                await context.route(pattern, lambda route: route.abort())
+
        return context

    def _make_config_signature(self, crawlerRunConfig: CrawlerRunConfig) -> str:
--- a/deploy/docker/api.py
+++ b/deploy/docker/api.py
@@ -67,7 +67,8 @@ async def handle_llm_qa(
    config: dict
 ) -> str:
    """Process QA using LLM with crawled content as context."""
-    from crawler_pool import get_crawler
+    from crawler_pool import get_crawler, release_crawler
+    crawler = None
    try:
        if not url.startswith(('http://', 'https://')) and not url.startswith(("raw:", "raw://")):
            url = 'https://' + url
@@ -121,6 +122,9 @@ async def handle_llm_qa(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=str(e)
        )
+    finally:
+        if crawler:
+            await release_crawler(crawler)

 async def process_llm_extraction(
    redis: aioredis.Redis,
@@ -249,6 +253,7 @@ async def handle_markdown_request(
    base_url: Optional[str] = None
 ) -> str:
    """Handle markdown generation requests."""
+    crawler = None
    try:
        # Validate provider if using LLM filter
        if filter_type == FilterType.LLM:
@@ -282,7 +287,7 @@ async def handle_markdown_request(

        cache_mode = CacheMode.ENABLED if cache == "1" else CacheMode.WRITE_ONLY

-        from crawler_pool import get_crawler
+        from crawler_pool import get_crawler, release_crawler
        from utils import load_config as _load_config
        _cfg = _load_config()
        browser_cfg = BrowserConfig(
@@ -315,6 +320,9 @@ async def handle_markdown_request(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=str(e)
        )
+    finally:
+        if crawler:
+            await release_crawler(crawler)

 async def handle_llm_request(
    redis: aioredis.Redis,
@@ -481,6 +489,7 @@ async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator)
    """Stream results with heartbeats and completion markers."""
    import json
    from utils import datetime_handler
+    from crawler_pool import release_crawler

    try:
        async for result in results_gen:
@@ -507,11 +516,8 @@ async def stream_results(crawler: AsyncWebCrawler, results_gen: AsyncGenerator)
    except asyncio.CancelledError:
        logger.warning("Client disconnected during streaming")
    finally:
-        # try:
-        #     await crawler.close()
-        # except Exception as e:
-        #     logger.error(f"Crawler cleanup error: {e}")
-        pass
+        if crawler:
+            await release_crawler(crawler)

 async def handle_crawl_request(
    urls: List[str],
@@ -523,6 +529,7 @@ async def handle_crawl_request(
    """Handle non-streaming crawl requests with optional hooks."""
    # Track request start
    request_id = f"req_{uuid4().hex[:8]}"
+    crawler = None
    try:
        from monitor import get_monitor
        await get_monitor().track_request_start(
@@ -549,11 +556,8 @@ async def handle_crawl_request(
            ) if config["crawler"]["rate_limiter"]["enabled"] else None
        )
        
-        from crawler_pool import get_crawler
+        from crawler_pool import get_crawler, release_crawler
        crawler = await get_crawler(browser_config)
-
-        # crawler: AsyncWebCrawler = AsyncWebCrawler(config=browser_config)
-        # await crawler.start()
        
        # Attach hooks if provided
        hooks_status = {}
@@ -589,8 +593,6 @@ async def handle_crawl_request(
        if not isinstance(results, list):
            results = [results]

-        # await crawler.close()
-        
        end_mem_mb = _get_memory_mb() # <--- Get memory after
        end_time = time.time()
        
@@ -689,13 +691,6 @@ async def handle_crawl_request(
        except:
            pass

-        if 'crawler' in locals() and crawler.ready: # Check if crawler was initialized and started
-            #  try:
-            #      await crawler.close()
-            #  except Exception as close_e:
-            #       logger.error(f"Error closing crawler during exception handling: {close_e}")
-            logger.error(f"Error closing crawler during exception handling: {str(e)}")
-
        # Measure memory even on error if possible
        end_mem_mb_error = _get_memory_mb()
        if start_mem_mb is not None and end_mem_mb_error is not None:
@@ -709,6 +704,9 @@ async def handle_crawl_request(
                "server_peak_memory_mb": max(peak_mem_mb if peak_mem_mb else 0, end_mem_mb_error or 0)
            })
        )
+    finally:
+        if crawler:
+            await release_crawler(crawler)

 async def handle_stream_crawl_request(
    urls: List[str],
@@ -719,6 +717,7 @@ async def handle_stream_crawl_request(
 ) -> Tuple[AsyncWebCrawler, AsyncGenerator, Optional[Dict]]:
    """Handle streaming crawl requests with optional hooks."""
    hooks_info = None
+    crawler = None
    try:
        browser_config = BrowserConfig.load(browser_config)
        # browser_config.verbose = True # Set to False or remove for production stress testing
@@ -734,11 +733,8 @@ async def handle_stream_crawl_request(
            )
        )

-        from crawler_pool import get_crawler
+        from crawler_pool import get_crawler, release_crawler
        crawler = await get_crawler(browser_config)
-
-        # crawler = AsyncWebCrawler(config=browser_config)
-        # await crawler.start()
        
        # Attach hooks if provided
        if hooks_config:
@@ -763,13 +759,10 @@ async def handle_stream_crawl_request(
        return crawler, results_gen, hooks_info

    except Exception as e:
-        # Make sure to close crawler if started during an error here
-        if 'crawler' in locals() and crawler.ready:
-            #  try:
-            #       await crawler.close()
-            #  except Exception as close_e:
-            #       logger.error(f"Error closing crawler during stream setup exception: {close_e}")
-            logger.error(f"Error closing crawler during stream setup exception: {str(e)}")
+        # Release crawler on setup error (for successful streams,
+        # release happens in stream_results finally block)
+        if crawler:
+            await release_crawler(crawler)
        logger.error(f"Stream crawl error: {str(e)}", exc_info=True)
        # Raising HTTPException here will prevent streaming response
        raise HTTPException(
--- a/deploy/docker/crawler_pool.py
+++ b/deploy/docker/crawler_pool.py
@@ -39,6 +39,9 @@ async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
        if PERMANENT and _is_default_config(sig):
            LAST_USED[sig] = time.time()
            USAGE_COUNT[sig] = USAGE_COUNT.get(sig, 0) + 1
+            if not hasattr(PERMANENT, 'active_requests'):
+                PERMANENT.active_requests = 0
+            PERMANENT.active_requests += 1
            logger.info("🔥 Using permanent browser")
            return PERMANENT

@@ -46,13 +49,21 @@ async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
        if sig in HOT_POOL:
            LAST_USED[sig] = time.time()
            USAGE_COUNT[sig] = USAGE_COUNT.get(sig, 0) + 1
-            logger.info(f"♨️  Using hot pool browser (sig={sig[:8]})")
-            return HOT_POOL[sig]
+            crawler = HOT_POOL[sig]
+            if not hasattr(crawler, 'active_requests'):
+                crawler.active_requests = 0
+            crawler.active_requests += 1
+            logger.info(f"♨️  Using hot pool browser (sig={sig[:8]}, active={crawler.active_requests})")
+            return crawler

        # Check cold pool (promote to hot if used 3+ times)
        if sig in COLD_POOL:
            LAST_USED[sig] = time.time()
            USAGE_COUNT[sig] = USAGE_COUNT.get(sig, 0) + 1
+            crawler = COLD_POOL[sig]
+            if not hasattr(crawler, 'active_requests'):
+                crawler.active_requests = 0
+            crawler.active_requests += 1

            if USAGE_COUNT[sig] >= 3:
                logger.info(f"⬆️  Promoting to hot pool (sig={sig[:8]}, count={USAGE_COUNT[sig]})")
@@ -68,7 +79,7 @@ async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
                return HOT_POOL[sig]

            logger.info(f"❄️  Using cold pool browser (sig={sig[:8]})")
-            return COLD_POOL[sig]
+            return crawler

        # Memory check before creating new
        mem_pct = get_container_memory_percent()
@@ -80,11 +91,23 @@ async def get_crawler(cfg: BrowserConfig) -> AsyncWebCrawler:
        logger.info(f"🆕 Creating new browser in cold pool (sig={sig[:8]}, mem={mem_pct:.1f}%)")
        crawler = AsyncWebCrawler(config=cfg, thread_safe=False)
        await crawler.start()
+        crawler.active_requests = 1
        COLD_POOL[sig] = crawler
        LAST_USED[sig] = time.time()
        USAGE_COUNT[sig] = 1
        return crawler

+async def release_crawler(crawler: AsyncWebCrawler):
+    """Decrement active request count for a pooled crawler.
+
+    Call this in a finally block after finishing work with a crawler
+    obtained via get_crawler() so the janitor knows when it's safe
+    to close idle browsers.
+    """
+    async with LOCK:
+        if hasattr(crawler, 'active_requests'):
+            crawler.active_requests = max(0, crawler.active_requests - 1)
+
 async def init_permanent(cfg: BrowserConfig):
    """Initialize permanent default browser."""
    global PERMANENT, DEFAULT_CONFIG_SIG
@@ -132,10 +155,13 @@ async def janitor():
            # Clean cold pool
            for sig in list(COLD_POOL.keys()):
                if now - LAST_USED.get(sig, now) > cold_ttl:
+                    crawler = COLD_POOL[sig]
+                    if getattr(crawler, 'active_requests', 0) > 0:
+                        continue  # still serving requests, skip
                    idle_time = now - LAST_USED[sig]
                    logger.info(f"🧹 Closing cold browser (sig={sig[:8]}, idle={idle_time:.0f}s)")
                    with suppress(Exception):
-                        await COLD_POOL[sig].close()
+                        await crawler.close()
                    COLD_POOL.pop(sig, None)
                    LAST_USED.pop(sig, None)
                    USAGE_COUNT.pop(sig, None)
@@ -150,10 +176,13 @@ async def janitor():
            # Clean hot pool (more conservative)
            for sig in list(HOT_POOL.keys()):
                if now - LAST_USED.get(sig, now) > hot_ttl:
+                    crawler = HOT_POOL[sig]
+                    if getattr(crawler, 'active_requests', 0) > 0:
+                        continue  # still serving requests, skip
                    idle_time = now - LAST_USED[sig]
                    logger.info(f"🧹 Closing hot browser (sig={sig[:8]}, idle={idle_time:.0f}s)")
                    with suppress(Exception):
-                        await HOT_POOL[sig].close()
+                        await crawler.close()
                    HOT_POOL.pop(sig, None)
                    LAST_USED.pop(sig, None)
                    USAGE_COUNT.pop(sig, None)
--- a/deploy/docker/server.py
+++ b/deploy/docker/server.py
@@ -7,7 +7,7 @@ Crawl4AI FastAPI entry‑point
 """

 # ── stdlib & 3rd‑party imports ───────────────────────────────
-from crawler_pool import get_crawler, close_all, janitor
+from crawler_pool import get_crawler, release_crawler, close_all, janitor
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 from crawl4ai.__version__ import __version__
 from auth import create_access_token, get_token_dependency, TokenRequest
@@ -367,8 +367,8 @@ async def generate_html(
    Use when you need sanitized HTML structures for building schemas or further processing.
    """
    validate_url_scheme(body.url, allow_raw=True)
-    from crawler_pool import get_crawler
    cfg = CrawlerRunConfig()
+    crawler = None
    try:
        crawler = await get_crawler(get_default_browser_config())
        results = await crawler.arun(url=body.url, config=cfg)
@@ -381,6 +381,9 @@ async def generate_html(
        return JSONResponse({"html": processed_html, "url": body.url, "success": True})
    except Exception as e:
        raise HTTPException(500, detail=str(e))
+    finally:
+        if crawler:
+            await release_crawler(crawler)

 # Screenshot endpoint

@@ -399,7 +402,7 @@ async def generate_screenshot(
    Then in result instead of the screenshot you will get a path to the saved file.
    """
    validate_url_scheme(body.url)
-    from crawler_pool import get_crawler
+    crawler = None
    try:
        cfg = CrawlerRunConfig(screenshot=True, screenshot_wait_for=body.screenshot_wait_for)
        crawler = await get_crawler(get_default_browser_config())
@@ -416,6 +419,9 @@ async def generate_screenshot(
        return {"success": True, "screenshot": screenshot_data}
    except Exception as e:
        raise HTTPException(500, detail=str(e))
+    finally:
+        if crawler:
+            await release_crawler(crawler)

 # PDF endpoint

@@ -434,7 +440,7 @@ async def generate_pdf(
    Then in result instead of the PDF you will get a path to the saved file.
    """
    validate_url_scheme(body.url)
-    from crawler_pool import get_crawler
+    crawler = None
    try:
        cfg = CrawlerRunConfig(pdf=True)
        crawler = await get_crawler(get_default_browser_config())
@@ -451,6 +457,9 @@ async def generate_pdf(
        return {"success": True, "pdf": base64.b64encode(pdf_data).decode()}
    except Exception as e:
        raise HTTPException(500, detail=str(e))
+    finally:
+        if crawler:
+            await release_crawler(crawler)


@app.post("/execute_js")
@@ -507,7 +516,7 @@ async def execute_js(

    """
    validate_url_scheme(body.url)
-    from crawler_pool import get_crawler
+    crawler = None
    try:
        cfg = CrawlerRunConfig(js_code=body.scripts)
        crawler = await get_crawler(get_default_browser_config())
@@ -518,6 +527,9 @@ async def execute_js(
        return JSONResponse(data)
    except Exception as e:
        raise HTTPException(500, detail=str(e))
+    finally:
+        if crawler:
+            await release_crawler(crawler)


@app.get("/llm/{url:path}")
--- a/docs/md_v2/api/parameters.md
+++ b/docs/md_v2/api/parameters.md
@@ -49,6 +49,8 @@ browser_cfg = BrowserConfig(
 | **`user_agent_generator_config`** | `dict` (default: `{}`)     | Configuration dict for user agent generation when `user_agent_mode="random"`.                                                         |
 | **`text_mode`**       | `bool` (default: `False`)              | If `True`, tries to disable images/other heavy content for speed.                                                                     |
 | **`light_mode`**      | `bool` (default: `False`)              | Disables some background features for performance gains.                                                                              |
+| **`avoid_ads`**       | `bool` (default: `False`)              | If `True`, blocks requests to common ad/tracker domains (Google Analytics, DoubleClick, Facebook, Hotjar, etc.) at the browser context level. |
+| **`avoid_css`**       | `bool` (default: `False`)              | If `True`, blocks loading of CSS files (`.css`, `.less`, `.scss`, `.sass`) for faster, leaner crawls when only text content is needed. |
 | **`extra_args`**      | `list` (default: `[]`)                 | Additional flags for the underlying browser process, e.g. `["--disable-extensions"]`.                                                |
 | **`enable_stealth`**  | `bool` (default: `False`)              | Enable playwright-stealth mode to bypass bot detection. Cannot be used with `browser_mode="builtin"`.                                |

--- a/docs/md_v2/complete-sdk-reference.md
+++ b/docs/md_v2/complete-sdk-reference.md
@@ -1402,6 +1402,8 @@ class BrowserConfig:
        user_agent=None,
        text_mode=False,
        light_mode=False,
+        avoid_ads=False,
+        avoid_css=False,
        extra_args=None,
        enable_stealth=False,
        # ... other advanced parameters omitted here
@@ -1440,15 +1442,19 @@ class BrowserConfig:
 8. **`user_agent`**:  
   - Custom User-Agent string. If `None`, a default is used.  
   - You can also set `user_agent_mode="random"` for randomization (if you want to fight bot detection).
-9. **`text_mode`** & **`light_mode`**:  
-   - `text_mode=True` disables images, possibly speeding up text-only crawls.  
-   - `light_mode=True` turns off certain background features for performance.  
-10. **`extra_args`**:  
-    - Additional flags for the underlying browser.  
+9. **`text_mode`** & **`light_mode`**:
+   - `text_mode=True` disables images, possibly speeding up text-only crawls.
+   - `light_mode=True` turns off certain background features for performance.
+10. **`avoid_ads`** & **`avoid_css`**:
+    - `avoid_ads=True` blocks requests to common ad and tracker domains (Google Analytics, DoubleClick, Facebook, Hotjar, etc.) at the browser context level. Reduces network overhead and memory usage.
+    - `avoid_css=True` blocks loading of CSS files (`.css`, `.less`, `.scss`, `.sass`), useful when you only need text content and want faster, leaner crawls.
+    - Both default to `False` (opt-in). Can be combined with each other and with `text_mode`.
+11. **`extra_args`**:
+    - Additional flags for the underlying browser.
    - E.g. `["--disable-extensions"]`.
-11. **`enable_stealth`**:  
-    - If `True`, enables stealth mode using playwright-stealth.  
-    - Modifies browser fingerprints to avoid basic bot detection.  
+12. **`enable_stealth`**:
+    - If `True`, enables stealth mode using playwright-stealth.
+    - Modifies browser fingerprints to avoid basic bot detection.
    - Default is `False`. Recommended for sites with bot protection.
 ### Helper Methods
 Both configuration classes provide a `clone()` method to create modified copies:
--- a/docs/md_v2/core/browser-crawler-config.md
+++ b/docs/md_v2/core/browser-crawler-config.md
@@ -109,17 +109,22 @@ class BrowserConfig:
    - `user_agent`: Custom User-Agent string. If `None`, a default is used.  
    - `user_agent_mode`: Set to `"random"` for randomization (helps fight bot detection).

-12.⠀**`text_mode`** & **`light_mode`**  
-    - `text_mode=True` disables images, possibly speeding up text-only crawls.  
-    - `light_mode=True` turns off certain background features for performance.  
+12.⠀**`text_mode`** & **`light_mode`**
+    - `text_mode=True` disables images, possibly speeding up text-only crawls.
+    - `light_mode=True` turns off certain background features for performance.

-13.⠀**`extra_args`**  
+13.⠀**`avoid_ads`** & **`avoid_css`**
+    - `avoid_ads=True` blocks requests to common ad and tracker domains (Google Analytics, DoubleClick, Facebook, Hotjar, etc.) at the browser context level. Reduces network overhead and memory usage.
+    - `avoid_css=True` blocks loading of CSS files (`.css`, `.less`, `.scss`, `.sass`), useful when you only need text content and want faster, leaner crawls.
+    - Both default to `False` (opt-in). Can be combined with each other and with `text_mode`.
+
+14.⠀**`extra_args`**  
    - Additional flags for the underlying browser.  
    - E.g. `["--disable-extensions"]`.

-14.⠀**`enable_stealth`**  
-    - If `True`, enables stealth mode using playwright-stealth.  
-    - Modifies browser fingerprints to avoid basic bot detection.  
+15.⠀**`enable_stealth`**
+    - If `True`, enables stealth mode using playwright-stealth.
+    - Modifies browser fingerprints to avoid basic bot detection.
    - Default is `False`. Recommended for sites with bot protection.

 ### Helper Methods
--- a/tests/browser/test_resource_filtering.py
+++ b/tests/browser/test_resource_filtering.py
@@ -0,0 +1,178 @@
+"""E2E tests for avoid_ads / avoid_css resource filtering.
+
+These tests launch real browsers and crawl real websites to verify
+that route-based resource blocking actually works.
+
+Domains used:
+  - books.toscrape.com  (CSS-heavy practice site, designed for scraping)
+  - quotes.toscrape.com (simple practice site)
+  - httpbin.org/html    (static HTML, no trackers)
+  - en.wikipedia.org    (real site with analytics)
+"""
+
+import pytest
+from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
+
+
+# ---------------------------------------------------------------------------
+# Basic success tests — flags should not break crawling
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_crawl_with_avoid_css_succeeds():
+    """Crawl books.toscrape.com with avoid_css=True — page should load fine."""
+    browser_config = BrowserConfig(headless=True, avoid_css=True)
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(
+            url="https://books.toscrape.com",
+            config=CrawlerRunConfig(cache_mode="bypass"),
+        )
+        assert result.success, f"Crawl failed: {result.error_message}"
+        assert len(result.html) > 500, "Page HTML is suspiciously short"
+
+
+@pytest.mark.asyncio
+async def test_crawl_with_avoid_ads_succeeds():
+    """Crawl Wikipedia with avoid_ads=True — content should be intact."""
+    browser_config = BrowserConfig(headless=True, avoid_ads=True)
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(
+            url="https://en.wikipedia.org/wiki/Web_scraping",
+            config=CrawlerRunConfig(cache_mode="bypass"),
+        )
+        assert result.success, f"Crawl failed: {result.error_message}"
+        # Wikipedia article content must be present
+        html_lower = result.html.lower()
+        assert "web scraping" in html_lower, "Wikipedia content missing"
+
+
+@pytest.mark.asyncio
+async def test_crawl_with_both_flags_succeeds():
+    """Both avoid_css and avoid_ads enabled simultaneously."""
+    browser_config = BrowserConfig(headless=True, avoid_css=True, avoid_ads=True)
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(
+            url="https://quotes.toscrape.com",
+            config=CrawlerRunConfig(cache_mode="bypass"),
+        )
+        assert result.success, f"Crawl failed: {result.error_message}"
+        html_lower = result.html.lower()
+        assert "quote" in html_lower or "toscrape" in html_lower
+
+
+@pytest.mark.asyncio
+async def test_avoid_ads_does_not_block_page_content():
+    """avoid_ads must not interfere with first-party page content."""
+    browser_config = BrowserConfig(headless=True, avoid_ads=True)
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(
+            url="https://httpbin.org/html",
+            config=CrawlerRunConfig(cache_mode="bypass"),
+        )
+        assert result.success, f"Crawl failed: {result.error_message}"
+        # httpbin.org/html serves a Moby Dick excerpt
+        assert "Herman Melville" in result.html, "First-party content missing"
+
+
+# ---------------------------------------------------------------------------
+# Network-level verification — prove routes actually block requests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+async def test_without_flags_css_loads_normally():
+    """Baseline: without avoid_css, CSS responses should appear in network log."""
+    browser_config = BrowserConfig(headless=True)
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(
+            url="https://books.toscrape.com",
+            config=CrawlerRunConfig(
+                cache_mode="bypass",
+                capture_network_requests=True,
+            ),
+        )
+        assert result.success
+        assert result.network_requests is not None, "Network requests not captured"
+
+        # There should be successful CSS responses
+        css_responses = [
+            r
+            for r in result.network_requests
+            if r.get("event_type") == "response" and ".css" in r.get("url", "")
+        ]
+        assert (
+            len(css_responses) > 0
+        ), "CSS should load normally without avoid_css flag"
+
+
+@pytest.mark.asyncio
+async def test_avoid_css_blocks_css_requests():
+    """With avoid_css=True, CSS requests must be aborted (no successful responses)."""
+    browser_config = BrowserConfig(headless=True, avoid_css=True)
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(
+            url="https://books.toscrape.com",
+            config=CrawlerRunConfig(
+                cache_mode="bypass",
+                capture_network_requests=True,
+            ),
+        )
+        assert result.success
+        assert result.network_requests is not None, "Network requests not captured"
+
+        # No CSS should have gotten a successful response
+        css_responses = [
+            r
+            for r in result.network_requests
+            if r.get("event_type") == "response" and ".css" in r.get("url", "")
+        ]
+        assert (
+            len(css_responses) == 0
+        ), f"CSS responses should be blocked, but found: {[r['url'] for r in css_responses]}"
+
+        # There SHOULD be request_failed events for CSS (proves blocking happened)
+        css_failures = [
+            r
+            for r in result.network_requests
+            if r.get("event_type") == "request_failed"
+            and ".css" in r.get("url", "")
+        ]
+        assert (
+            len(css_failures) > 0
+        ), "Expected request_failed events for blocked CSS files"
+
+
+@pytest.mark.asyncio
+async def test_avoid_css_with_text_mode_combines():
+    """Both avoid_css and text_mode should combine their blocking rules."""
+    browser_config = BrowserConfig(
+        headless=True, avoid_css=True, text_mode=True
+    )
+    async with AsyncWebCrawler(config=browser_config) as crawler:
+        result = await crawler.arun(
+            url="https://books.toscrape.com",
+            config=CrawlerRunConfig(
+                cache_mode="bypass",
+                capture_network_requests=True,
+            ),
+        )
+        assert result.success
+        assert result.network_requests is not None
+
+        successful = [
+            r for r in result.network_requests if r.get("event_type") == "response"
+        ]
+
+        # CSS should be blocked (via avoid_css)
+        css_hits = [r for r in successful if ".css" in r.get("url", "")]
+        assert len(css_hits) == 0, "CSS should be blocked by avoid_css"
+
+        # Images should be blocked (via text_mode)
+        img_exts = (".jpg", ".jpeg", ".png", ".gif", ".webp")
+        img_hits = [
+            r
+            for r in successful
+            if any(r.get("url", "").lower().endswith(ext) for ext in img_exts)
+        ]
+        assert len(img_hits) == 0, "Images should be blocked by text_mode"
--- a/tests/docker/test_pool_release.py
+++ b/tests/docker/test_pool_release.py
@@ -0,0 +1,155 @@
+"""Tests for crawler pool release_crawler() and active_requests tracking.
+
+These tests validate the pool lifecycle without requiring Docker or a running
+server. They test the release logic directly using mock crawler objects.
+"""
+
+import asyncio
+import pytest
+from unittest.mock import MagicMock
+
+
+# ---------------------------------------------------------------------------
+# Standalone release_crawler implementation for testing
+# (mirrors the logic that will be added to deploy/docker/crawler_pool.py)
+# ---------------------------------------------------------------------------
+
+_TEST_LOCK = asyncio.Lock()
+
+
+async def _release_crawler(crawler, lock=None):
+    """Standalone release logic matching crawler_pool.release_crawler()."""
+    lock = lock or _TEST_LOCK
+    async with lock:
+        if hasattr(crawler, "active_requests"):
+            crawler.active_requests = max(0, crawler.active_requests - 1)
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+class TestReleaseCrawler:
+    """Tests for the release_crawler function."""
+
+    @pytest.mark.asyncio
+    async def test_release_decrements_active_requests(self):
+        """release_crawler should decrement active_requests by 1."""
+        crawler = MagicMock()
+        crawler.active_requests = 3
+
+        await _release_crawler(crawler)
+        assert crawler.active_requests == 2
+
+    @pytest.mark.asyncio
+    async def test_release_floors_at_zero(self):
+        """active_requests should never go below 0."""
+        crawler = MagicMock()
+        crawler.active_requests = 0
+
+        await _release_crawler(crawler)
+        assert crawler.active_requests == 0
+
+    @pytest.mark.asyncio
+    async def test_release_from_one_to_zero(self):
+        """Standard case: single request finishes."""
+        crawler = MagicMock()
+        crawler.active_requests = 1
+
+        await _release_crawler(crawler)
+        assert crawler.active_requests == 0
+
+    @pytest.mark.asyncio
+    async def test_release_handles_missing_attribute(self):
+        """Should not crash if crawler has no active_requests attribute."""
+        crawler = MagicMock(spec=[])  # no attributes at all
+        # Should not raise
+        await _release_crawler(crawler)
+
+    @pytest.mark.asyncio
+    async def test_multiple_releases_decrement_correctly(self):
+        """Multiple sequential releases should each decrement by 1."""
+        crawler = MagicMock()
+        crawler.active_requests = 5
+
+        for expected in [4, 3, 2, 1, 0, 0]:  # last one should floor at 0
+            await _release_crawler(crawler)
+            assert crawler.active_requests == expected
+
+    @pytest.mark.asyncio
+    async def test_concurrent_releases_are_safe(self):
+        """Concurrent releases should not corrupt the counter."""
+        crawler = MagicMock()
+        crawler.active_requests = 100
+        lock = asyncio.Lock()
+
+        async def release_n_times(n):
+            for _ in range(n):
+                await _release_crawler(crawler, lock=lock)
+
+        # 10 concurrent tasks each releasing 10 times = 100 total
+        tasks = [asyncio.create_task(release_n_times(10)) for _ in range(10)]
+        await asyncio.gather(*tasks)
+
+        assert crawler.active_requests == 0
+
+
+class TestActiveRequestsTracking:
+    """Tests for the get/release lifecycle pattern."""
+
+    @pytest.mark.asyncio
+    async def test_get_sets_active_requests(self):
+        """Simulated get_crawler should set active_requests to 1 for new crawlers."""
+        crawler = MagicMock()
+        # Simulate what get_crawler does for a new browser
+        crawler.active_requests = 1
+
+        assert crawler.active_requests == 1
+
+    @pytest.mark.asyncio
+    async def test_get_increments_existing(self):
+        """Simulated get_crawler should increment for existing pooled crawlers."""
+        crawler = MagicMock()
+        crawler.active_requests = 2
+
+        # Simulate another get_crawler call returning same browser
+        crawler.active_requests += 1
+        assert crawler.active_requests == 3
+
+    @pytest.mark.asyncio
+    async def test_full_get_release_lifecycle(self):
+        """Full lifecycle: get -> use -> release -> get -> release."""
+        crawler = MagicMock()
+
+        # First request gets the crawler
+        crawler.active_requests = 1
+
+        # Second concurrent request gets same crawler
+        crawler.active_requests += 1
+        assert crawler.active_requests == 2
+
+        # First request finishes
+        await _release_crawler(crawler)
+        assert crawler.active_requests == 1
+
+        # Second request finishes
+        await _release_crawler(crawler)
+        assert crawler.active_requests == 0
+
+    @pytest.mark.asyncio
+    async def test_janitor_safety_check(self):
+        """Janitor should only close browsers with active_requests == 0."""
+        crawler = MagicMock()
+        crawler.active_requests = 1
+
+        # Janitor check: should NOT close
+        should_close = getattr(crawler, "active_requests", 0) == 0
+        assert should_close is False
+
+        # Request finishes
+        await _release_crawler(crawler)
+
+        # Janitor check: now safe to close
+        should_close = getattr(crawler, "active_requests", 0) == 0
+        assert should_close is True
--- a/tests/unit/test_resource_filtering_config.py
+++ b/tests/unit/test_resource_filtering_config.py
@@ -0,0 +1,100 @@
+"""Unit tests for BrowserConfig avoid_ads / avoid_css flags.
+
+Tests the config plumbing: defaults, serialization, cloning, roundtrips.
+No browser or network required.
+"""
+
+import pytest
+from crawl4ai.async_configs import BrowserConfig
+
+
+@pytest.fixture(autouse=True)
+def _reset_defaults():
+    """Ensure clean slate for each test."""
+    BrowserConfig.reset_defaults()
+    yield
+    BrowserConfig.reset_defaults()
+
+
+class TestResourceFilteringDefaults:
+    """Both flags must default to False (opt-in only)."""
+
+    def test_default_values_are_false(self):
+        config = BrowserConfig()
+        assert config.avoid_ads is False
+        assert config.avoid_css is False
+
+    def test_custom_values(self):
+        config = BrowserConfig(avoid_ads=True, avoid_css=True)
+        assert config.avoid_ads is True
+        assert config.avoid_css is True
+
+    def test_mixed_values(self):
+        c1 = BrowserConfig(avoid_ads=True, avoid_css=False)
+        assert c1.avoid_ads is True
+        assert c1.avoid_css is False
+
+        c2 = BrowserConfig(avoid_ads=False, avoid_css=True)
+        assert c2.avoid_ads is False
+        assert c2.avoid_css is True
+
+
+class TestResourceFilteringSerialization:
+    """Flags must survive to_dict / from_kwargs / dump / load roundtrips."""
+
+    def test_to_dict_includes_flags(self):
+        config = BrowserConfig(avoid_ads=True, avoid_css=True)
+        d = config.to_dict()
+        assert "avoid_ads" in d
+        assert "avoid_css" in d
+        assert d["avoid_ads"] is True
+        assert d["avoid_css"] is True
+
+    def test_to_dict_includes_false_values(self):
+        config = BrowserConfig()
+        d = config.to_dict()
+        assert d["avoid_ads"] is False
+        assert d["avoid_css"] is False
+
+    def test_from_kwargs_roundtrip(self):
+        original = BrowserConfig(avoid_ads=True, avoid_css=False)
+        d = original.to_dict()
+        restored = BrowserConfig.from_kwargs(d)
+        assert restored.avoid_ads is True
+        assert restored.avoid_css is False
+
+    def test_from_kwargs_with_true_values(self):
+        restored = BrowserConfig.from_kwargs({"avoid_ads": True, "avoid_css": True})
+        assert restored.avoid_ads is True
+        assert restored.avoid_css is True
+
+    def test_dump_load_roundtrip(self):
+        original = BrowserConfig(avoid_ads=True, avoid_css=True)
+        dumped = original.dump()
+        restored = BrowserConfig.load(dumped)
+        assert restored.avoid_ads is True
+        assert restored.avoid_css is True
+
+
+class TestResourceFilteringClone:
+    """clone() must preserve flags and allow overrides."""
+
+    def test_clone_preserves_flags(self):
+        config = BrowserConfig(avoid_ads=True, avoid_css=True)
+        cloned = config.clone()
+        assert cloned.avoid_ads is True
+        assert cloned.avoid_css is True
+
+    def test_clone_allows_override(self):
+        config = BrowserConfig(avoid_ads=True, avoid_css=False)
+        cloned = config.clone(avoid_css=True)
+        assert cloned.avoid_ads is True
+        assert cloned.avoid_css is True
+        # original unchanged
+        assert config.avoid_css is False
+
+    def test_clone_can_disable_flag(self):
+        config = BrowserConfig(avoid_ads=True, avoid_css=True)
+        cloned = config.clone(avoid_ads=False)
+        assert cloned.avoid_ads is False
+        assert cloned.avoid_css is True