From 06f78f2d03c7e68ed1cdc0ebf3c7e958d5812cea Mon Sep 17 00:00:00 2001
From: Max Bohomolov <moriturus7@gmail.com>
Date: Sun, 14 Jun 2026 17:42:35 +0000
Subject: [PATCH 1/3] Add `AiCrawler` with AI-powered HTML extraction

---
 pyproject.toml                                |   3 +-
 src/crawlee/crawlers/__init__.py              |  42 ++
 src/crawlee/crawlers/_ai/__init__.py          |  42 ++
 src/crawlee/crawlers/_ai/_ai_crawler.py       | 173 +++++
 .../crawlers/_ai/_ai_crawling_context.py      |  44 ++
 src/crawlee/crawlers/_ai/_base_distiller.py   |  66 ++
 src/crawlee/crawlers/_ai/_base_extractor.py   | 113 ++++
 .../crawlers/_ai/_clean_html_distiller.py     | 260 +++++++
 src/crawlee/crawlers/_ai/_direct_extractor.py | 144 ++++
 src/crawlee/crawlers/_ai/_prompts.py          |  47 ++
 .../crawlers/_ai/_selector_extractor.py       | 633 ++++++++++++++++++
 .../crawlers/_ai/_skeleton_distiller.py       | 216 ++++++
 src/crawlee/crawlers/_ai/_types.py            | 132 ++++
 src/crawlee/crawlers/_ai/_utils.py            |  28 +
 uv.lock                                       | 455 ++++++++++++-
 15 files changed, 2395 insertions(+), 3 deletions(-)
 create mode 100644 src/crawlee/crawlers/_ai/__init__.py
 create mode 100644 src/crawlee/crawlers/_ai/_ai_crawler.py
 create mode 100644 src/crawlee/crawlers/_ai/_ai_crawling_context.py
 create mode 100644 src/crawlee/crawlers/_ai/_base_distiller.py
 create mode 100644 src/crawlee/crawlers/_ai/_base_extractor.py
 create mode 100644 src/crawlee/crawlers/_ai/_clean_html_distiller.py
 create mode 100644 src/crawlee/crawlers/_ai/_direct_extractor.py
 create mode 100644 src/crawlee/crawlers/_ai/_prompts.py
 create mode 100644 src/crawlee/crawlers/_ai/_selector_extractor.py
 create mode 100644 src/crawlee/crawlers/_ai/_skeleton_distiller.py
 create mode 100644 src/crawlee/crawlers/_ai/_types.py
 create mode 100644 src/crawlee/crawlers/_ai/_utils.py

diff --git a/pyproject.toml b/pyproject.toml
index 811622d742..16c43e1df4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -50,7 +50,7 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
-all = ["crawlee[adaptive-crawler,beautifulsoup,cli,curl-impersonate,httpx,parsel,playwright,otel,sql_sqlite,sql_postgres,sql_mysql,stagehand,redis]"]
+all = ["crawlee[adaptive-crawler,ai,beautifulsoup,cli,curl-impersonate,httpx,parsel,playwright,otel,sql_sqlite,sql_postgres,sql_mysql,stagehand,redis]"]
 adaptive-crawler = [
     "jaro-winkler>=2.0.3",
     "playwright>=1.27.0",
@@ -58,6 +58,7 @@ adaptive-crawler = [
     "apify_fingerprint_datapoints>=0.0.3",
     "browserforge>=1.2.4"
 ]
+ai = ["pydantic-ai-slim[openai]>=1.106.0", "parsel>=1.10.0", "lxml[html_clean]>=5.2.0"]
 beautifulsoup = ["beautifulsoup4[lxml]>=4.12.0", "html5lib>=1.0"]
 cli = ["cookiecutter>=2.6.0", "inquirer>=3.3.0", "rich>=13.9.0", "typer>=0.12.0"]
 curl-impersonate = ["curl-cffi>=0.9.0"]
diff --git a/src/crawlee/crawlers/__init__.py b/src/crawlee/crawlers/__init__.py
index ac97581bb0..2e67efa985 100644
--- a/src/crawlee/crawlers/__init__.py
+++ b/src/crawlee/crawlers/__init__.py
@@ -65,6 +65,36 @@
         StagehandPreNavCrawlingContext,
     )
 
+with _try_import(
+    __name__,
+    'AiCleanHtmlDistiller',
+    'AiCrawler',
+    'AiCrawlingContext',
+    'AiDirectExtractor',
+    'AiHtmlDistiller',
+    'AiHtmlExtractor',
+    'AiSelectorExtractor',
+    'AiSkeletonDistiller',
+    'AiUsageStats',
+    'BaseAiHtmlDistiller',
+    'BaseAiHtmlExtractor',
+    'get_basic_ai_cleaner',
+):
+    from ._ai import (
+        AiCleanHtmlDistiller,
+        AiCrawler,
+        AiCrawlingContext,
+        AiDirectExtractor,
+        AiHtmlDistiller,
+        AiHtmlExtractor,
+        AiSelectorExtractor,
+        AiSkeletonDistiller,
+        AiUsageStats,
+        BaseAiHtmlDistiller,
+        BaseAiHtmlExtractor,
+        get_basic_ai_cleaner,
+    )
+
 
 __all__ = [
     'AbstractHttpCrawler',
@@ -74,6 +104,17 @@
     'AdaptivePlaywrightCrawlingContext',
     'AdaptivePlaywrightPostNavCrawlingContext',
     'AdaptivePlaywrightPreNavCrawlingContext',
+    'AiCleanHtmlDistiller',
+    'AiCrawler',
+    'AiCrawlingContext',
+    'AiDirectExtractor',
+    'AiHtmlDistiller',
+    'AiHtmlExtractor',
+    'AiSelectorExtractor',
+    'AiSkeletonDistiller',
+    'AiUsageStats',
+    'BaseAiHtmlDistiller',
+    'BaseAiHtmlExtractor',
     'BasicCrawler',
     'BasicCrawlerOptions',
     'BasicCrawlingContext',
@@ -99,4 +140,5 @@
     'StagehandCrawlingContext',
     'StagehandPostNavCrawlingContext',
     'StagehandPreNavCrawlingContext',
+    'get_basic_ai_cleaner',
 ]
diff --git a/src/crawlee/crawlers/_ai/__init__.py b/src/crawlee/crawlers/_ai/__init__.py
new file mode 100644
index 0000000000..90571efc04
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/__init__.py
@@ -0,0 +1,42 @@
+from crawlee._utils.try_import import install_import_hook as _install_import_hook
+from crawlee._utils.try_import import try_import as _try_import
+
+_install_import_hook(__name__)
+
+# The following imports are wrapped in try_import to handle optional dependencies (the `ai` extra),
+# ensuring the module can still function even if these dependencies are missing.
+with _try_import(__name__, 'AiCrawler'):
+    from ._ai_crawler import AiCrawler
+with _try_import(__name__, 'AiCrawlingContext'):
+    from ._ai_crawling_context import AiCrawlingContext
+with _try_import(__name__, 'BaseAiHtmlExtractor'):
+    from ._base_extractor import BaseAiHtmlExtractor
+with _try_import(__name__, 'AiDirectExtractor'):
+    from ._direct_extractor import AiDirectExtractor
+with _try_import(__name__, 'AiSelectorExtractor'):
+    from ._selector_extractor import AiSelectorExtractor
+with _try_import(__name__, 'BaseAiHtmlDistiller'):
+    from ._base_distiller import BaseAiHtmlDistiller
+with _try_import(__name__, 'AiCleanHtmlDistiller'):
+    from ._clean_html_distiller import AiCleanHtmlDistiller
+with _try_import(__name__, 'AiSkeletonDistiller'):
+    from ._skeleton_distiller import AiSkeletonDistiller
+with _try_import(__name__, 'AiHtmlDistiller', 'AiHtmlExtractor', 'AiUsageStats'):
+    from ._types import AiHtmlDistiller, AiHtmlExtractor, AiUsageStats
+with _try_import(__name__, 'get_basic_ai_cleaner'):
+    from ._utils import get_basic_ai_cleaner
+
+__all__ = [
+    'AiCleanHtmlDistiller',
+    'AiCrawler',
+    'AiCrawlingContext',
+    'AiDirectExtractor',
+    'AiHtmlDistiller',
+    'AiHtmlExtractor',
+    'AiSelectorExtractor',
+    'AiSkeletonDistiller',
+    'AiUsageStats',
+    'BaseAiHtmlDistiller',
+    'BaseAiHtmlExtractor',
+    'get_basic_ai_cleaner',
+]
diff --git a/src/crawlee/crawlers/_ai/_ai_crawler.py b/src/crawlee/crawlers/_ai/_ai_crawler.py
new file mode 100644
index 0000000000..5c89d20e1f
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_ai_crawler.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+import warnings
+from contextlib import AbstractAsyncContextManager
+from logging import getLogger
+from typing import TYPE_CHECKING
+
+from parsel import Selector
+
+from crawlee._utils.docs import docs_group
+from crawlee.crawlers import AbstractHttpCrawler, HttpCrawlerOptions
+from crawlee.crawlers._parsel._parsel_crawling_context import ParselCrawlingContext
+from crawlee.crawlers._parsel._parsel_parser import ParselParser
+
+from ._ai_crawling_context import AiCrawlingContext
+from ._direct_extractor import AiDirectExtractor
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncGenerator
+
+    from pydantic_ai.models import Model
+    from typing_extensions import Unpack
+
+    from crawlee import Request
+    from crawlee.crawlers._abstract_http import ParsedHttpCrawlingContext
+
+    from ._types import AiHtmlExtractor, AiUsageStats, ExtractFunction, TSchema
+
+
+logger = getLogger(__name__)
+
+
+@docs_group('Crawlers')
+class AiCrawler(AbstractHttpCrawler[AiCrawlingContext, Selector, Selector]):
+    """A web crawler that extracts structured data from pages using an AI model.
+
+    Builds on `AbstractHttpCrawler` and parses responses with Parsel, so the request handler has both the usual
+    Parsel `selector` and the AI-powered `extract` helper: pass a Pydantic model and get a validated instance back.
+
+    The model layer is Pydantic AI, so any provider it supports (OpenAI, Anthropic, Gemini, Ollama, ...) works
+    through the `model` argument. The default extractor is an `AiDirectExtractor`: each page is distilled and sent
+    to the model in one call. For cached CSS-selector extraction at near-zero LLM cost, pass an `AiSelectorExtractor`
+    through the `extractor` argument.
+
+    Warning:
+        This is an experimental crawler. Its public API may change in future versions.
+
+    ### Usage
+
+    ```python
+    from pydantic import BaseModel
+    from pydantic_ai.models.openai import OpenAIChatModel
+    from pydantic_ai.providers.openai import OpenAIProvider
+
+    from crawlee.crawlers import AiCrawler, AiCrawlingContext
+
+
+    class Article(BaseModel):
+        title: str
+        author: str | None
+
+
+    crawler = AiCrawler(model=OpenAIChatModel('gpt-5.4-nano', provider=OpenAIProvider(api_key='...')))
+
+
+    @crawler.router.default_handler
+    async def request_handler(context: AiCrawlingContext) -> None:
+        article = await context.extract(Article)
+        await context.push_data(article.model_dump())
+
+
+    await crawler.run(['https://crawlee.dev/'])
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        model: str | Model | None = None,
+        extractor: AiHtmlExtractor | None = None,
+        **kwargs: Unpack[HttpCrawlerOptions[AiCrawlingContext]],
+    ) -> None:
+        """Initialize a new instance.
+
+        Args:
+            model: The model used for extraction, given to the default extractor (`AiDirectExtractor`). A
+                provider-prefixed name (e.g. `'openai:gpt-5.4-nano'`) or a Pydantic AI `Model` instance. When given
+                as a string, the provider reads credentials from its environment variable (e.g. `OPENAI_API_KEY`).
+                Pass a `Model` instance to supply them explicitly. Provide exactly one of `model` or `extractor`.
+            extractor: A pre-configured `AiHtmlExtractor`, for full control over the distiller, instructions,
+                caching, usage limits, and model fallback. Pass an `AiSelectorExtractor` here for cached-selector
+                extraction. Provide exactly one of `model` or `extractor`.
+            kwargs: Additional keyword arguments to pass to the underlying `AbstractHttpCrawler`.
+        """
+        if (model is None) == (extractor is None):
+            raise ValueError('Provide exactly one of `model` or `extractor`.')
+
+        if extractor is None and model is not None:
+            extractor = AiDirectExtractor(model)
+
+        if not extractor:
+            raise ValueError('Extractor initialization failed; check the provided model or extractor configuration.')
+
+        # Call the notification only once.
+        warnings.warn(
+            'The AiCrawler is experimental and its public API may change in future releases.',
+            category=UserWarning,
+            stacklevel=2,
+        )
+
+        self._ai_usage = extractor.ai_usage
+        self._extractor = extractor
+
+        async def final_step(
+            context: ParsedHttpCrawlingContext[Selector],
+        ) -> AsyncGenerator[AiCrawlingContext, None]:
+            """Enhance `ParsedHttpCrawlingContext[Selector]` with the `extract` helper and `ai_usage`."""
+            parsel_context = ParselCrawlingContext.from_parsed_http_crawling_context(context)
+            yield AiCrawlingContext.from_parsel_crawling_context(
+                parsel_context,
+                extract=self._create_extract_function(parsel_context.selector, parsel_context.request),
+                ai_usage=self._ai_usage,
+            )
+
+        kwargs['_context_pipeline'] = self._create_static_content_crawler_pipeline().compose(final_step)
+
+        # If the extractor is an async context manager, add it to the crawler's additional context managers so it's
+        # properly entered and exited around the crawl.
+        if isinstance(extractor, AbstractAsyncContextManager):
+            kwargs['_additional_context_managers'] = [
+                *kwargs.get('_additional_context_managers', []),
+                extractor,
+            ]
+        super().__init__(
+            parser=ParselParser(),
+            **kwargs,
+        )
+
+    @property
+    def extractor(self) -> AiHtmlExtractor:
+        """The extractor used to turn pages into structured data."""
+        return self._extractor
+
+    @property
+    def ai_usage(self) -> AiUsageStats:
+        """Accumulated token usage across extraction calls."""
+        return self._ai_usage
+
+    def _create_extract_function(self, selector: Selector, request: Request) -> ExtractFunction:
+        """Build an `extract` helper bound to the page's parsed tree.
+
+        When the caller omits `cache_tag`, it defaults to `request.label` so an `AiSelectorExtractor` buckets
+        selectors per route without extra wiring. An explicit `cache_tag` overrides this.
+        """
+
+        async def extract(
+            schema: type[TSchema],
+            *,
+            scope: str | None = None,
+            cache_tag: str | None = None,
+            additional_instructions: str | None = None,
+        ) -> TSchema:
+            # `AiHtmlExtractor.extract` accepts a Selector directly, so the already-parsed tree is handed over
+            # without a serialize round trip.
+            return await self._extractor.extract(
+                selector,
+                schema,
+                scope=scope,
+                cache_tag=cache_tag if cache_tag is not None else request.label,
+                additional_instructions=additional_instructions,
+            )
+
+        return extract
diff --git a/src/crawlee/crawlers/_ai/_ai_crawling_context.py b/src/crawlee/crawlers/_ai/_ai_crawling_context.py
new file mode 100644
index 0000000000..18377a6644
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_ai_crawling_context.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, fields
+from typing import TYPE_CHECKING
+
+from crawlee._utils.docs import docs_group
+from crawlee.crawlers._parsel._parsel_crawling_context import ParselCrawlingContext
+
+if TYPE_CHECKING:
+    from typing_extensions import Self
+
+    from ._types import AiUsageStats, ExtractFunction
+
+
+@dataclass(frozen=True)
+@docs_group('Crawling contexts')
+class AiCrawlingContext(ParselCrawlingContext):
+    """The crawling context used by the `AiCrawler`.
+
+    It extends `ParselCrawlingContext`, so the full Parsel `selector` (and `enqueue_links`) remain available
+    alongside the AI-powered `extract` helper. Handlers can mix cheap manual selectors with AI extraction on the
+    same page.
+    """
+
+    extract: ExtractFunction
+    """Extract a structured Pydantic model from the page using the configured AI extractor."""
+
+    ai_usage: AiUsageStats
+    """The cumulative token usage stats of the extractor across calls in this crawl."""
+
+    @classmethod
+    def from_parsel_crawling_context(
+        cls,
+        context: ParselCrawlingContext,
+        *,
+        extract: ExtractFunction,
+        ai_usage: AiUsageStats,
+    ) -> Self:
+        """Create a new context from an existing `ParselCrawlingContext`."""
+        return cls(
+            extract=extract,
+            ai_usage=ai_usage,
+            **{field.name: getattr(context, field.name) for field in fields(context)},
+        )
diff --git a/src/crawlee/crawlers/_ai/_base_distiller.py b/src/crawlee/crawlers/_ai/_base_distiller.py
new file mode 100644
index 0000000000..3567054167
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_base_distiller.py
@@ -0,0 +1,66 @@
+from __future__ import annotations
+
+import re
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+from crawlee._utils.docs import docs_group
+
+if TYPE_CHECKING:
+    from lxml.html import HtmlElement
+
+
+# Placeholder tag used to hide JSON scripts from the cleaning pass. The cleaner removes `<script>` but leaves
+# unknown tags intact, so renaming protects position, attributes and content. The tag is restored afterwards.
+_JSON_SCRIPT_PROTECT_TAG = 'crawlee-json-script'
+
+# Matches any run of whitespace, used to collapse whitespace inside text nodes.
+_WHITESPACE_RE = re.compile(r'\s+')
+
+
+@docs_group('Other')
+class BaseAiHtmlDistiller(ABC):
+    """Base class for the built-in HTML distillers.
+
+    A distiller reduces raw HTML to a compact representation that an LLM can read cheaply. Subclasses implement
+    `distill`. The base stores the prompt notes and returns them from `get_prompt_notes`. Override
+    `get_prompt_notes` when the notes depend on several constructor arguments.
+
+    The public interface is the `AiHtmlDistiller` protocol. The concrete distillers are `AiCleanHtmlDistiller`
+    and `AiSkeletonDistiller`.
+    """
+
+    def __init__(self, *, prompt_notes: str | None = None) -> None:
+        """Initialize a new instance.
+
+        Args:
+            prompt_notes: Short description of the final representation. Appended to the LLM task instructions by
+                extractors. `None` means no notes are appended.
+        """
+        self._prompt_notes = prompt_notes
+
+    @abstractmethod
+    def distill(self, html: str) -> str:
+        """Convert raw HTML to a compact representation suitable for an LLM."""
+
+    def get_prompt_notes(self) -> str | None:
+        """Return the configured prompt notes, or `None` when not set."""
+        return self._prompt_notes
+
+    def _protect_json_scripts(self, tree: HtmlElement) -> None:
+        """Rename JSON-bearing `<script>` tags to protect them from the cleaner."""
+        for elem in tree.iter('script'):
+            if self._is_json_script(elem):
+                elem.tag = _JSON_SCRIPT_PROTECT_TAG
+
+    def _unprotect_json_scripts(self, tree: HtmlElement) -> None:
+        """Restore the original tag name to JSON-bearing scripts after cleaning."""
+        for elem in tree.iter(_JSON_SCRIPT_PROTECT_TAG):
+            elem.tag = 'script'
+
+    def _is_json_script(self, element: HtmlElement) -> bool:
+        """Check if the element is a `<script>` carrying a JSON payload."""
+        if element.tag not in ('script', _JSON_SCRIPT_PROTECT_TAG):
+            return False
+        type_attr = (element.get('type') or '').lower()
+        return type_attr == 'application/json' or type_attr.endswith('+json')
diff --git a/src/crawlee/crawlers/_ai/_base_extractor.py b/src/crawlee/crawlers/_ai/_base_extractor.py
new file mode 100644
index 0000000000..002dc13a2a
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_base_extractor.py
@@ -0,0 +1,113 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING
+
+from parsel import Selector
+from pydantic_ai.models import infer_model
+
+from crawlee._utils.docs import docs_group
+
+from ._types import AiUsageStats
+
+if TYPE_CHECKING:
+    from pydantic_ai.models import Model
+    from pydantic_ai.usage import UsageLimits
+
+    from ._types import AiHtmlDistiller, TSchema
+
+
+@docs_group('Other')
+class BaseAiHtmlExtractor(ABC):
+    """Base class for the built-in HTML extractors.
+
+    An HTML extractor turns a page into a validated Pydantic model with the help of an LLM. This abstract base
+    implements the parts the built-in extractors share: resolving the model, composing the task instructions with
+    the distiller's prompt notes, and accumulating token usage.
+
+    The public interface is the `AiHtmlExtractor` protocol. The concrete extractors are `AiDirectExtractor` and
+    `AiSelectorExtractor`.
+    """
+
+    def __init__(
+        self,
+        model: str | Model,
+        *,
+        distiller: AiHtmlDistiller,
+        instructions: str,
+        usage_limits: UsageLimits | None,
+    ) -> None:
+        """Initialize a new instance.
+
+        Args:
+            model: A provider-prefixed name (e.g. `'openai:gpt-5.4-nano'`) or a pydantic-ai `Model`. Credentials are
+                read from the provider's environment variable (e.g. `OPENAI_API_KEY`) or passed explicitly through a
+                `Model` instance.
+            distiller: The HTML distiller shaping the LLM input.
+            instructions: Base task instructions. The distiller's prompt notes are appended automatically.
+            usage_limits: Optional pydantic-ai `UsageLimits` applied to every single run.
+        """
+        self._model = infer_model(model)
+        self._distiller = distiller
+        self._base_instructions = self._compose_instructions(instructions, distiller)
+        self._usage_limits = usage_limits
+        self._ai_usage = AiUsageStats()
+
+    @property
+    def ai_usage(self) -> AiUsageStats:
+        """Accumulated token usage of this extractor's runs."""
+        return self._ai_usage
+
+    def set_ai_usage(self, value: AiUsageStats) -> None:
+        """Replace the usage accumulator with `value`.
+
+        Lets an external owner share one accumulator across a delegation chain.
+
+        Args:
+            value: The accumulator to adopt.
+        """
+        self._ai_usage = value
+
+    @abstractmethod
+    async def extract(
+        self,
+        content: str | Selector,
+        schema: type[TSchema],
+        *,
+        scope: str | None = None,
+        cache_tag: str | None = None,
+        additional_instructions: str | None = None,
+    ) -> TSchema:
+        """Extract a structured instance of `schema` from `content`."""
+
+    @staticmethod
+    def _compose_instructions(instructions: str, distiller: AiHtmlDistiller) -> str:
+        """Append the distiller's input-format notes to the task instructions.
+
+        Args:
+            instructions: The base task instructions.
+            distiller: The distiller producing the LLM input.
+        """
+        notes = distiller.get_prompt_notes()
+        return f'{instructions}\n\n{notes}' if notes else instructions
+
+    @staticmethod
+    def _resolve_scope(selector: Selector, scope: str) -> Selector:
+        """Return the first subtree matching `scope`, or raise.
+
+        Args:
+            selector: The Parsel selector to query.
+            scope: A CSS selector identifying the region of interest.
+
+        Raises:
+            ValueError: When the scope matches nothing on the page.
+        """
+        scoped = selector.css(scope)
+        if not scoped:
+            raise ValueError(f'Extraction scope {scope!r} matched nothing on the page.')
+        return scoped[0]
+
+    @staticmethod
+    def _as_selector(content: str | Selector) -> Selector:
+        """Wrap a raw HTML string in a `Selector`, or return the input unchanged."""
+        return content if isinstance(content, Selector) else Selector(text=content)
diff --git a/src/crawlee/crawlers/_ai/_clean_html_distiller.py b/src/crawlee/crawlers/_ai/_clean_html_distiller.py
new file mode 100644
index 0000000000..644a6c12e7
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_clean_html_distiller.py
@@ -0,0 +1,260 @@
+from __future__ import annotations
+
+from logging import getLogger
+from typing import TYPE_CHECKING
+
+import lxml.html
+from lxml import etree  # ty: ignore[unresolved-import]
+from typing_extensions import override
+
+from crawlee._utils.docs import docs_group
+
+from ._base_distiller import _WHITESPACE_RE, BaseAiHtmlDistiller
+from ._prompts import _CLEAN_HTML_PROMPT_NOTES, _TRUNCATION_MARKER
+from ._utils import get_basic_ai_cleaner
+
+if TYPE_CHECKING:
+    from lxml.html import HtmlElement
+    from lxml_html_clean import Cleaner
+
+
+# Attributes that carry selector targets or semantic meaning for an LLM.
+_SEMANTIC_ATTRS = frozenset(
+    {
+        'class',
+        'id',
+        'itemprop',
+        'itemtype',
+        'href',
+        'src',
+        'alt',
+        'title',
+        'name',
+        'property',
+        'content',
+        'datetime',
+        'role',
+        'type',
+        'value',
+        'placeholder',
+        'aria-label',
+        'lang',
+        'for',
+    }
+)
+
+logger = getLogger(__name__)
+
+
+@docs_group('Other')
+class AiCleanHtmlDistiller(BaseAiHtmlDistiller):
+    """Distiller that produces cleaned, structure-preserving HTML for direct LLM extraction.
+
+    The full page text survives, so the data to extract lives inside the produced document. Tags, nesting, and
+    semantic attributes (`class`, `itemprop`, `datetime`) are kept so the model can tell fields apart.
+
+    JSON scripts are kept in full by default. For sites where a JSON-LD or framework blob is itself the data, this
+    is the cheapest path. Such blobs can reach hundreds of kilobytes, so set `max_json_len` for them.
+
+    This is the default distiller for `AiDirectExtractor`. See `AiSkeletonDistiller` for the selector-generation
+    variant.
+
+    ### Usage
+
+    ```python
+    from crawlee.crawlers import AiCleanHtmlDistiller
+
+    distiller = AiCleanHtmlDistiller(max_json_len=5_000)
+    distilled_html = distiller.distill('<html>...</html>')
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        cleaner: Cleaner | None = None,
+        max_classes: int = 5,
+        max_attr_len: int = 300,
+        max_json_len: int | None = None,
+        keep_head: bool = True,
+        max_size: int | None = 400_000,
+        pretty: bool = False,
+        prompt_notes: str | None = _CLEAN_HTML_PROMPT_NOTES,
+    ) -> None:
+        """Initialize a new instance.
+
+        Args:
+            cleaner: A custom `lxml_html_clean.Cleaner`.
+            max_classes: How many class tokens to keep per element.
+            max_attr_len: Cap on attribute value length, in characters.
+            max_json_len: Cap on JSON payload length, or `None` to keep in full.
+            keep_head: Whether to keep a reduced `<head>` containing `<title>`, semantic `<meta>` and JSON scripts.
+            max_size: Hard cap on the distilled document, in characters. When breached, the tail is dropped and
+                replaced with the truncation marker.
+            pretty: Whether to pretty-print the serialized HTML.
+            prompt_notes: Override for the default prompt notes. Pass `None` to send no notes to the LLM.
+        """
+        super().__init__(prompt_notes=prompt_notes)
+        self._cleaner = cleaner or get_basic_ai_cleaner()
+        self._max_classes = max_classes
+        self._max_attr_len = max_attr_len
+        self._max_json_len = max_json_len
+        self._keep_head = keep_head
+        self._max_size = max_size
+        self._pretty = pretty
+
+    @override
+    def distill(self, html: str) -> str:
+        """Convert raw HTML to the cleaned, structure-preserving representation.
+
+        Args:
+            html: The raw HTML markup.
+        """
+        if not html or not html.strip():
+            return ''
+
+        tree = self._parse_and_clean(html)
+        self._reduce(tree)
+        distilled_html = self._serialize(tree)
+
+        return self._enforce_max_size(distilled_html, html)
+
+    def _parse_and_clean(self, html: str) -> HtmlElement:
+        """Parse raw HTML and run the cleaning stage in place.
+
+        Args:
+            html: The raw HTML markup.
+        """
+        tree = lxml.html.fromstring(html)
+
+        self._protect_json_scripts(tree)
+
+        self._cleaner(tree)
+
+        self._unprotect_json_scripts(tree)
+
+        return tree
+
+    def _reduce(self, tree: HtmlElement) -> None:
+        """Apply reduction passes to the cleaned tree in place.
+
+        Args:
+            tree: The cleaned lxml tree.
+        """
+        self._reduce_head(tree)
+        self._filter_attributes(tree)
+        self._truncate_json_scripts(tree)
+        self._normalize_text(tree)
+
+    def _reduce_head(self, tree: HtmlElement) -> None:
+        """Reduce `<head>` to its useful children, or drop it entirely.
+
+        Args:
+            tree: The lxml tree.
+        """
+        head = tree.find('head')
+        if head is None:
+            return
+
+        if not self._keep_head:
+            head.getparent().remove(head)
+            return
+
+        for child in head:
+            keep_child = (
+                child.tag == 'title'
+                or self._is_json_script(child)
+                # meta with `name` or `property` carries structured data, everything else is noise.
+                or (child.tag == 'meta' and (child.get('name') or child.get('property')))
+            )
+            if not keep_child:
+                head.remove(child)
+
+    def _filter_attributes(self, tree: HtmlElement) -> None:
+        """Drop attributes outside the semantic allowlist, truncate long values.
+
+        Args:
+            tree: The lxml tree.
+        """
+        for elem in tree.iter():
+            if not isinstance(elem.tag, str):
+                continue
+
+            for name in list(elem.attrib):
+                if name in _SEMANTIC_ATTRS or name.startswith(('data-', 'aria-')):
+                    value = elem.attrib[name]
+
+                    # Inline `data:` URIs (base64 images and the like) are pure noise for an LLM.
+                    if value.lstrip().lower().startswith('data:'):
+                        del elem.attrib[name]
+                        continue
+
+                    if name == 'class':
+                        kept_classes = ' '.join(value.split()[: self._max_classes])
+                        if kept_classes:
+                            elem.attrib[name] = kept_classes
+                        else:
+                            del elem.attrib[name]
+                    elif len(value) > self._max_attr_len:
+                        elem.attrib[name] = value[: self._max_attr_len]
+                else:
+                    del elem.attrib[name]
+
+    def _truncate_json_scripts(self, tree: HtmlElement) -> None:
+        """Cap JSON script payloads to `max_json_len` characters, when set.
+
+        Args:
+            tree: The lxml tree.
+        """
+        if self._max_json_len is None:
+            return
+        for elem in tree.iter('script'):
+            if self._is_json_script(elem) and elem.text and len(elem.text) > self._max_json_len:
+                # Truncated JSON is invalid JSON, but the LLM only needs to see the key structure to anchor
+                # selectors or read top-level fields.
+                elem.text = elem.text[: self._max_json_len] + _TRUNCATION_MARKER
+
+    def _normalize_text(self, tree: HtmlElement) -> None:
+        """Collapse whitespace runs in text and tail content.
+
+        Args:
+            tree: The lxml tree.
+        """
+        for elem in tree.iter():
+            if not isinstance(elem.tag, str):
+                continue
+
+            if elem.text and not self._is_json_script(elem):
+                elem.text = _WHITESPACE_RE.sub(' ', elem.text)
+            if elem.tail:
+                elem.tail = _WHITESPACE_RE.sub(' ', elem.tail)
+
+    def _serialize(self, tree: HtmlElement) -> str:
+        """Serialize the lxml tree to an HTML string.
+
+        Args:
+            tree: The lxml tree.
+        """
+        return etree.tostring(tree, encoding='unicode', pretty_print=self._pretty)
+
+    def _enforce_max_size(
+        self,
+        distilled_html: str,
+        html: str,  # noqa: ARG002 exposed for subclasses that prefer re-distillation
+    ) -> str:
+        """Apply the size budget by cutting the tail and appending the marker.
+
+        Args:
+            distilled_html: The distilled output to size-check.
+            html: The original markup.
+        """
+        if self._max_size is not None and len(distilled_html) > self._max_size:
+            # No safe way to cut HTML mid-stream without breaking the structure.
+            logger.warning(
+                f'{type(self).__name__} output exceeds max_size ({len(distilled_html)} > {self._max_size}). '
+                'The tail of the page is cut off and invisible to the LLM. '
+                'Raise `max_size`, `scope` the extraction, or set `max_json_len`.'
+            )
+            return distilled_html[: self._max_size] + _TRUNCATION_MARKER
+
+        return distilled_html
diff --git a/src/crawlee/crawlers/_ai/_direct_extractor.py b/src/crawlee/crawlers/_ai/_direct_extractor.py
new file mode 100644
index 0000000000..8bf16e7e16
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_direct_extractor.py
@@ -0,0 +1,144 @@
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING, cast
+
+from parsel import Selector
+from pydantic_ai import Agent
+from pydantic_ai.usage import RunUsage
+
+from crawlee._utils.docs import docs_group
+
+from ._base_extractor import BaseAiHtmlExtractor
+from ._clean_html_distiller import AiCleanHtmlDistiller
+from ._prompts import _DIRECT_INSTRUCTIONS
+
+if TYPE_CHECKING:
+    from pydantic_ai.models import Model
+    from pydantic_ai.usage import UsageLimits
+
+    from ._types import AiHtmlDistiller, TSchema
+
+
+@docs_group('Other')
+class AiDirectExtractor(BaseAiHtmlExtractor):
+    """Extractor that asks the LLM to read the page and return the data directly.
+
+    The page is distilled to compact HTML and sent to the model in a single call. The user schema is the agent's
+    output type, so pydantic-ai validates the result and feeds invalid output back to the model. This is the
+    simplest extractor and works on any page, at the cost of one LLM call per page.
+
+    See the `AiHtmlExtractor` protocol for the common extractor interface, and `AiSelectorExtractor` for a variant
+    that learns reusable CSS selectors.
+
+    ### Usage
+
+    ```python
+    from pydantic import BaseModel
+    from pydantic_ai.models.openai import OpenAIChatModel
+    from pydantic_ai.providers.openai import OpenAIProvider
+
+    from crawlee.crawlers import AiDirectExtractor
+
+
+    class Product(BaseModel):
+        name: str
+        price: str | None
+
+
+    model = OpenAIChatModel('gpt-5.4-nano', provider=OpenAIProvider(api_key='...'))
+    extractor = AiDirectExtractor(model=model)
+    product = await extractor.extract('<html>...</html>', Product)
+    ```
+    """
+
+    def __init__(
+        self,
+        model: str | Model,
+        *,
+        distiller: AiHtmlDistiller | None = None,
+        instructions: str = _DIRECT_INSTRUCTIONS,
+        retries: int = 1,
+        usage_limits: UsageLimits | None = None,
+    ) -> None:
+        """Initialize a new instance.
+
+        Args:
+            model: A provider-prefixed name (e.g. `'openai:gpt-5.4-nano'`) or a pydantic-ai `Model`.
+            distiller: The HTML distiller shaping the LLM input. Defaults to `AiCleanHtmlDistiller`.
+            instructions: Base task instructions. The distiller's prompt notes are appended automatically.
+            retries: How many times the model may fix output that fails schema validation within one run (pydantic-ai
+                output retries).
+            usage_limits: Optional pydantic-ai `UsageLimits` applied to every single run.
+        """
+        super().__init__(
+            model,
+            distiller=distiller or AiCleanHtmlDistiller(),
+            instructions=instructions,
+            usage_limits=usage_limits,
+        )
+        self._retries = retries
+
+    async def extract(
+        self,
+        content: str | Selector,
+        schema: type[TSchema],
+        *,
+        scope: str | None = None,
+        cache_tag: str | None = None,  # noqa: ARG002 ignored in direct extraction (no caching)
+        additional_instructions: str | None = None,
+    ) -> TSchema:
+        """Distill `content`, send it to the model, and return a validated `schema`.
+
+        Args:
+            content: Raw HTML or a parsed Parsel `Selector`.
+            schema: The Pydantic model describing the desired output.
+            scope: Optional CSS selector restricting extraction to the first matching subtree.
+            cache_tag: Ignored in direct extraction.
+            additional_instructions: Extra instructions appended for this call only.
+        """
+        if scope is not None:
+            # Scope resolution requires a parsed tree. Serializing the matched subtree also keeps the distiller input
+            # minimal.
+            content = self._resolve_scope(self._as_selector(content), scope)
+        html = content.get() if isinstance(content, Selector) else content
+        return await self._run(html, schema, additional_instructions)
+
+    async def _run(
+        self,
+        html: str,
+        schema: type[TSchema],
+        additional_instructions: str | None,
+    ) -> TSchema:
+        distilled_html = await asyncio.to_thread(self._distiller.distill, html)
+
+        # `cast` restores the static type pinned at runtime by `output_type`.
+        agent: Agent[None, TSchema] = cast(
+            'Agent[None, TSchema]',
+            Agent(
+                self._model,
+                output_type=schema,
+                instructions=self._base_instructions,
+                retries=self._retries,
+            ),
+        )
+
+        # The task framing names the fields explicitly: the output tool schema alone is not enough for smaller
+        # models, which otherwise answer that no fields were requested or describe the page instead of extracting
+        # from it. Types and descriptions already reach the model through the output tool schema, so they are not
+        # repeated.
+        field_names = ', '.join(schema.model_fields)
+        prompt = f'Extract the following fields from the document below: {field_names}.\n\nDocument:\n{distilled_html}'
+
+        run_usage = RunUsage()
+        try:
+            result = await agent.run(
+                prompt,
+                instructions=additional_instructions,
+                usage_limits=self._usage_limits,
+                usage=run_usage,
+            )
+        finally:
+            self._ai_usage.add(run_usage)
+
+        return result.output
diff --git a/src/crawlee/crawlers/_ai/_prompts.py b/src/crawlee/crawlers/_ai/_prompts.py
new file mode 100644
index 0000000000..9525bcba3a
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_prompts.py
@@ -0,0 +1,47 @@
+# Marker for truncated values in distillate documents and prompt instructions.
+_TRUNCATION_MARKER = '…'
+
+# Default prompt instructions for direct extraction from HTML.
+_DIRECT_INSTRUCTIONS = (
+    'You are a precise web data extraction engine. Extract the requested fields strictly from the provided '
+    'document. Follow these rules:\n'
+    '- A field value is content copied verbatim from the document - never a description, summary, or '
+    'commentary about the document or its elements.\n'
+    '- Add no formatting of your own: no bullets, no "Label: value" prefixes, no markdown.\n'
+    '- If a field spans several elements (e.g. paragraphs of an article body), join their text in document '
+    'order with newlines, adding nothing.\n'
+    '- Use only what is present in the document; never invent or infer missing values. Leave absent fields '
+    'empty (null for optional fields).\n'
+    '- With several similar items on the page, return one entry per item; do not merge them.\n'
+    '- Do not reformat, translate, or normalize values unless the field definition asks for it.\n'
+    '- Return URLs exactly as they appear in `href`/`src`, without resolving or rewriting.'
+)
+
+# Instructions for the selector-generating prompt.
+_SELECTOR_INSTRUCTIONS = (
+    'You are an expert in CSS selectors. Given an HTML document, produce one Parsel CSS selector per '
+    'requested field. Every leaf (data) selector MUST end with `::text` or `::attr(name)` so it yields a '
+    'value rather than an element. For list fields the selector must match every item on the page. Prefer '
+    'stable anchors - semantic tags, ids, meaningful classes, `itemprop` and `data-*` attributes - and never '
+    'use positional selectors such as `:nth-child` or ids of individual list items: pages vary between '
+    'requests. For a field that is a list of items, provide a container selector matching every item element '
+    '(no ::text/::attr on it) and sub-selectors for the item fields written RELATIVE to one item container, '
+    'each ending with `::text` or `::attr(name)`.'
+)
+
+# Default prompt-notes for `AiCleanHtmlDistiller`.
+_CLEAN_HTML_PROMPT_NOTES = (
+    'The document is distilled HTML. Scripts and styling are removed; tags, nesting, and data-bearing '
+    'attributes (`href`, `src`, `id`, `class`, `data-*`, `aria-*`, `lang`, `datetime`, `content`) are '
+    'preserved. JSON payloads (`application/ld+json`, `application/json`) are kept and are a reliable source '
+    f'for the requested fields. Values ending with `{_TRUNCATION_MARKER}` are truncated.'
+)
+
+# Default prompt-notes for `AiSkeletonDistiller`.
+_SKELETON_PROMPT_NOTES = (
+    'The document is a skeleton of an HTML page. Scripts and styling are removed; tags, nesting, and '
+    'data-bearing attributes are preserved; JSON payloads are truncated to their key structure. Text is '
+    'truncated to short samples, so rely on structure and attributes rather than on exact text content. Runs '
+    'of repeated siblings are collapsed with an HTML comment marker; selectors must match every such sibling '
+    f'on the full page. Values ending with `{_TRUNCATION_MARKER}` are truncated.'
+)
diff --git a/src/crawlee/crawlers/_ai/_selector_extractor.py b/src/crawlee/crawlers/_ai/_selector_extractor.py
new file mode 100644
index 0000000000..51668d83b1
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_selector_extractor.py
@@ -0,0 +1,633 @@
+from __future__ import annotations
+
+import asyncio
+import hashlib
+import json
+import types
+from collections import defaultdict
+from enum import Enum
+from logging import getLogger
+from typing import TYPE_CHECKING, Union, cast, get_args, get_origin
+
+from cssselect import SelectorError
+from pydantic import BaseModel, Field, ValidationError
+from pydantic_ai import Agent, ModelRetry
+from pydantic_ai.exceptions import UnexpectedModelBehavior
+from pydantic_ai.usage import RunUsage
+
+from crawlee._utils.docs import docs_group
+from crawlee._utils.recoverable_state import RecoverableState
+
+from ._base_extractor import BaseAiHtmlExtractor
+from ._prompts import _SELECTOR_INSTRUCTIONS
+from ._skeleton_distiller import AiSkeletonDistiller
+
+if TYPE_CHECKING:
+    from typing import Any
+
+    from parsel import Selector
+    from pydantic_ai.models import Model
+    from pydantic_ai.usage import UsageLimits
+
+    from ._types import AiHtmlDistiller, AiHtmlExtractor, AiUsageStats, TSchema
+
+logger = getLogger(__name__)
+
+
+class FieldSelector(BaseModel):
+    """One node of a selector map. It is a leaf selector or an item group. It mirrors the user schema shape."""
+
+    selector: str = Field(
+        description=(
+            'Parsel CSS selector. For data fields it must end with ::text or ::attr(...). '
+            'For item-group fields it is a container selector matching every item element, '
+            'without ::text or ::attr.'
+        )
+    )
+    fields: dict[str, FieldSelector] | None = Field(
+        default=None,
+        description=(
+            'Sub-selectors for the item fields. Set only for item-group fields. '
+            'Each one is written relative to one item container.'
+        ),
+    )
+
+
+class SelectorMap(BaseModel):
+    """LLM output for `AiSelectorExtractor`. A tree of Parsel CSS selectors that mirrors the user schema."""
+
+    selectors: dict[str, FieldSelector] = Field(
+        description=(
+            'Maps each schema field name to its selector. A leaf field maps to one selector. '
+            'An item-group field maps to a container selector with sub-selectors.'
+        )
+    )
+
+
+class SelectorCacheState(BaseModel):
+    """Persisted selector cache of one `AiSelectorExtractor`.
+
+    Each key is a `(schema, scope, cache_tag)` digest. Each value is the list
+    of selector maps learned for that bucket, one per markup variant.
+    """
+
+    selectors: dict[str, list[SelectorMap]] = Field(default_factory=dict)
+
+
+class _FieldKind(Enum):
+    """The selector-mapping shape of a single schema field."""
+
+    LEAF = 'leaf'
+    """A scalar value extracted by one leaf selector."""
+
+    LIST_SCALAR = 'list_scalar'
+    """A list of scalars extracted by one leaf selector matching many nodes."""
+
+    LIST_MODEL = 'list_model'
+    """A list of items. Maps to a container selector plus relative sub-selectors."""
+
+    NESTED_MODEL = 'nested_model'
+    """A single nested model. Maps to a container selector plus relative sub-selectors."""
+
+    LIST_UNION = 'list_union'
+    """Unsupported: a list of a union type (a match cannot pick a member)."""
+
+    LIST_OF_LISTS = 'list_of_lists'
+    """Unsupported: a list nested inside a list."""
+
+    MAPPING = 'mapping'
+    """Unsupported: a `dict`-typed field."""
+
+    UNSUPPORTED = 'unsupported'
+    """Unsupported: any other parametrized annotation (tuple, set, ...)."""
+
+
+@docs_group('Other')
+class AiSelectorExtractor(BaseAiHtmlExtractor):
+    """Extractor that learns reusable CSS selectors and reuses them for free.
+
+    On each call it first tries the cached selector maps and extracts with no LLM call when one fits. On a miss it
+    asks the model for a new map, validates it against the live page, and caches it. A bucket keeps several maps,
+    so A/B-tested markup variants can coexist.
+
+    The cache is a `RecoverableState` persisted to a `KeyValueStore`. As an async context manager it loads at
+    startup and saves at shutdown. Used standalone, it initializes lazily.
+
+    With a `fallback` extractor, unsupported schemas and generation failures degrade to it. Infrastructure errors
+    such as credentials, HTTP, and usage limits propagate.
+
+    See the `AiHtmlExtractor` protocol for the common extractor interface, and `AiDirectExtractor` for a per-page
+    variant with no selector cache.
+
+    ### Usage
+
+    ```python
+    from pydantic import BaseModel
+    from pydantic_ai.models.openai import OpenAIChatModel
+    from pydantic_ai.providers.openai import OpenAIProvider
+
+    from crawlee.crawlers import AiDirectExtractor, AiSelectorExtractor
+
+
+    class Product(BaseModel):
+        name: str
+        price: str | None
+
+
+    model = OpenAIChatModel('gpt-5.4-nano', provider=OpenAIProvider(api_key='...'))
+    extractor = AiSelectorExtractor(model=model, fallback=AiDirectExtractor(model=model))
+    product = await extractor.extract('<html>...</html>', Product, cache_tag='product')
+    ```
+    """
+
+    _MAX_RENDER_DEPTH = 5
+    """Hard cap on `_format_fields` recursion depth."""
+
+    def __init__(
+        self,
+        model: str | Model,
+        *,
+        kvs_cache_key: str | None = None,
+        distiller: AiHtmlDistiller | None = None,
+        instructions: str = _SELECTOR_INSTRUCTIONS,
+        retries: int = 3,
+        max_variants: int = 5,
+        fallback: AiHtmlExtractor | None = None,
+        usage_limits: UsageLimits | None = None,
+        persistence: bool = True,
+    ) -> None:
+        """Initialize a new instance.
+
+        Args:
+            model: A provider-prefixed name (e.g. `'openai:gpt-5.4-nano'`) or a pydantic-ai `Model`.
+            kvs_cache_key: Name of the `KeyValueStore` record holding the selector cache. Defaults to `'AI-SELECTORS'`.
+            distiller: The HTML distiller shaping the LLM input. Defaults to `AiSkeletonDistiller`.
+            instructions: Base selector-generation instructions. The distiller's prompt notes are appended
+                automatically.
+            retries: How many times the model may fix failing selectors within one generation.
+            max_variants: Cap on cached selector maps per bucket.
+            fallback: Extractor to degrade to when generation fails or the schema shape is unsupported.
+            usage_limits: Optional pydantic-ai `UsageLimits` applied to every generation run.
+            persistence: Whether the selector cache is persisted. Disable for ephemeral runs or tests.
+        """
+        super().__init__(
+            model,
+            distiller=distiller or AiSkeletonDistiller(),
+            instructions=instructions,
+            usage_limits=usage_limits,
+        )
+        self._retries = retries
+        self._max_variants = max_variants
+        self._fallback = fallback
+        self._persistence = persistence
+        self._share_usage_with_fallback()
+
+        self._selector_cache: RecoverableState[SelectorCacheState] = RecoverableState(
+            default_state=SelectorCacheState(),
+            persist_state_key=kvs_cache_key or 'AI-SELECTORS',
+            persistence_enabled=persistence,
+            logger=logger,
+        )
+        self._locks: defaultdict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
+        self._init_lock = asyncio.Lock()
+        self._active = False
+
+    def set_ai_usage(self, value: AiUsageStats) -> None:
+        """Adopt `value` and re-share it with the fallback chain."""
+        super().set_ai_usage(value)
+        self._share_usage_with_fallback()
+
+    def _share_usage_with_fallback(self) -> None:
+        """Make the fallback chain accumulate into this extractor's `ai_usage`."""
+        if self._fallback is not None:
+            self._fallback.set_ai_usage(self._ai_usage)
+
+    @property
+    def active(self) -> bool:
+        """Whether the extractor is in its async context-manager scope."""
+        return self._active
+
+    async def __aenter__(self) -> AiSelectorExtractor:
+        """Initialize the selector cache eagerly."""
+        if self._active:
+            raise RuntimeError(f'The {type(self).__name__} is already active.')
+
+        self._active = True
+        if not self._selector_cache.is_initialized:
+            await self._selector_cache.initialize()
+        return self
+
+    async def __aexit__(self, exc_type: object, exc_value: object, exc_traceback: object) -> None:
+        """Persist the selector cache one final time and detach from events."""
+        if not self._active:
+            raise RuntimeError(f'The {type(self).__name__} is not active.')
+
+        await self._selector_cache.teardown()
+        self._active = False
+
+    async def extract(
+        self,
+        content: str | Selector,
+        schema: type[TSchema],
+        *,
+        scope: str | None = None,
+        cache_tag: str | None = None,
+        additional_instructions: str | None = None,
+    ) -> TSchema:
+        """Extract `schema` from `content` using cached or freshly generated selectors.
+
+        Args:
+            content: Raw HTML or a parsed Parsel `Selector`.
+            schema: The Pydantic model describing the desired output.
+            scope: Optional CSS selector restricting extraction to the first matching subtree.
+            cache_tag: Optional tag identifying the page kind. Selectors are cached per tag.
+            additional_instructions: Extra instructions appended for this call only.
+
+        Raises:
+            ValueError: When the schema shape is unsupported and no fallback is configured, or when `scope` matches
+                nothing.
+        """
+        selector = self._as_selector(content)
+
+        if scope is not None:
+            # Everything below runs against this subtree, so generated selectors cannot match content outside the scope.
+            selector = self._resolve_scope(selector, scope)
+
+        # Reject unsupported schema shapes before any cache or LLM work.
+        reason = self._unsupported_schema_reason(schema)
+        if reason is not None:
+            if self._fallback is not None:
+                logger.info(
+                    f'Schema {schema.__name__} is not supported by cached selectors ({reason}). '
+                    f'Delegating to the fallback extractor.'
+                )
+                return await self._delegate_to_fallback(selector, schema, additional_instructions)
+            raise ValueError(
+                f'AiSelectorExtractor does not support this schema shape: {reason}. '
+                'Configure a fallback extractor or use AiDirectExtractor for it.'
+            )
+
+        if not self._selector_cache.is_initialized:
+            async with self._init_lock:
+                if not self._selector_cache.is_initialized:
+                    # Lazy init for standalone use. Under `AiCrawler` the context manager initializes it at startup.
+                    await self._selector_cache.initialize()
+
+        cache_digest = self._build_cache_digest(schema, scope, cache_tag)
+        variants = self._selector_cache.current_value.selectors.setdefault(cache_digest, [])
+
+        extracted = self._try_cached_variants(variants, selector, schema)
+        if extracted is not None:
+            return extracted
+
+        async with self._locks[cache_digest]:
+            # A concurrent miss may have generated selectors while we waited for the lock, so check the cache again.
+            extracted = self._try_cached_variants(variants, selector, schema)
+            if extracted is not None:
+                return extracted
+
+            try:
+                selector_map = await self._generate_selectors(selector, schema, additional_instructions)
+            except UnexpectedModelBehavior:
+                if self._fallback is not None:
+                    return await self._delegate_to_fallback(selector, schema, additional_instructions)
+                raise
+
+            variants.insert(0, selector_map)
+            # `variants` is a live reference into the cached state, so trim it in place. Reassigning would shadow it
+            # instead of updating the cache.
+            del variants[self._max_variants :]
+
+            return self._apply_selectors(selector_map, selector, schema)
+
+    async def _delegate_to_fallback(
+        self,
+        selector: Selector,
+        schema: type[TSchema],
+        additional_instructions: str | None,
+    ) -> TSchema:
+        if self._fallback is None:
+            raise RuntimeError('Cannot delegate to a fallback extractor because none is configured.')
+        # The scope was already applied to `selector`, so the fallback gets the subtree and no scope.
+        return await self._fallback.extract(
+            selector,
+            schema,
+            additional_instructions=additional_instructions,
+        )
+
+    def _try_cached_variants(
+        self,
+        variants: list[SelectorMap],
+        selector: Selector,
+        schema: type[TSchema],
+    ) -> TSchema | None:
+        for index, selector_map in enumerate(variants):
+            try:
+                extracted = self._apply_selectors(selector_map, selector, schema)
+            except (ValidationError, ValueError, SelectorError):
+                continue  # belongs to a different template variant or is malformed
+            if index > 0:
+                # Move-to-front so this variant is tried first next time.
+                variants.insert(0, variants.pop(index))
+            return extracted
+        return None
+
+    async def _generate_selectors(
+        self,
+        selector: Selector,
+        schema: type[TSchema],
+        additional_instructions: str | None,
+    ) -> SelectorMap:
+        agent: Agent[None, SelectorMap] = cast(
+            'Agent[None, SelectorMap]',
+            Agent(
+                self._model,
+                output_type=SelectorMap,
+                instructions=self._base_instructions,
+                retries=self._retries,
+            ),
+        )
+
+        @agent.output_validator
+        def _validate(plan: SelectorMap) -> SelectorMap:
+            self._check_fields_covered(plan, schema)
+            self._check_selectors_compile_and_match(plan, selector)
+            self._check_apply_succeeds(plan, selector, schema)
+            return plan
+
+        skeleton = await asyncio.to_thread(self._distiller.distill, selector.get())
+        # The output type is `SelectorMap`, so the user schema never reaches the model through a tool. The prompt
+        # spells it out instead, paid once per markup variant.
+        prompt = f'Fields to extract:\n{self._format_fields(schema)}\n\nPage skeleton:\n{skeleton}'
+
+        run_usage = RunUsage()
+        try:
+            result = await agent.run(
+                prompt,
+                instructions=additional_instructions,
+                usage_limits=self._usage_limits,
+                usage=run_usage,
+            )
+        finally:
+            self._ai_usage.add(run_usage)
+
+        return result.output
+
+    @staticmethod
+    def _check_fields_covered(plan: SelectorMap, schema: type[BaseModel]) -> None:
+        """Raise `ModelRetry` when the plan misses required schema fields."""
+        missing = [name for name in schema.model_fields if name not in plan.selectors]
+        if missing:
+            raise ModelRetry(f'No selector provided for fields: {missing}')
+
+    @staticmethod
+    def _is_leaf_selector_form(selector: str) -> bool:
+        """Whether a selector targets a value (ends with `::text` or `::attr(...)`)."""
+        return selector.endswith('::text') or '::attr(' in selector
+
+    def _check_selectors_compile_and_match(self, plan: SelectorMap, selector: Selector) -> None:
+        """Raise `ModelRetry` on invalid CSS, wrong selector form, or no matches.
+
+        Generation-time strictness: a selector matching nothing yields `[]` for list fields and `None` for optional
+        ones. Both are schema-valid, so without this check a useless selector map would be accepted and cached,
+        silently returning empty data from then on. At apply time empty matches stay legal (other pages of the
+        template may genuinely lack the content).
+        """
+        empty: list[str] = []
+        for name, field_selector in plan.selectors.items():
+            is_container = field_selector.fields is not None
+            is_leaf_form = self._is_leaf_selector_form(field_selector.selector)
+            # A container with ::text/::attr yields text nodes instead of elements, breaking `_apply_fields` on item
+            # groups. A leaf without that form yields whole elements instead of a value. Flag both so the model
+            # fixes the form, not a downstream symptom.
+            if is_container and is_leaf_form:
+                raise ModelRetry(
+                    f'The container selector for field {name!r} must not end with ::text or ::attr(...); '
+                    'that form is only for leaf fields. A container selects item ELEMENTS, and sub-selectors '
+                    'extract leaves relative to each item.'
+                )
+            if not is_container and not is_leaf_form:
+                raise ModelRetry(
+                    f'The selector for leaf field {name!r} must end with ::text or ::attr(...) so it yields a '
+                    'value, not an element. Append ::text for the element text or ::attr(name) for an attribute.'
+                )
+            try:
+                matched = selector.css(field_selector.selector)
+            except SelectorError as exc:
+                raise ModelRetry(
+                    f'The selector for field {name!r} is not valid Parsel CSS ({exc}). '
+                    'Use plain CSS ending with ::text or ::attr(...).'
+                ) from exc
+            if not matched:
+                empty.append(name)
+                continue
+            # Check sub-selectors against the first matched item. Cheap, and turns a vague "field required" error
+            # into a targeted "this relative selector matches nothing in an item".
+            for sub_name, sub in (field_selector.fields or {}).items():
+                if not self._is_leaf_selector_form(sub.selector):
+                    raise ModelRetry(
+                        f'The selector for field {name!r}[].{sub_name!r} must end with ::text or ::attr(...); '
+                        'sub-selectors extract leaf values relative to one item container.'
+                    )
+                try:
+                    sub_matched = matched[0].css(sub.selector)
+                except SelectorError as exc:
+                    raise ModelRetry(
+                        f'The selector for field {name!r}[].{sub_name!r} is not valid Parsel CSS ({exc}). '
+                        'Use plain CSS ending with ::text or ::attr(...). '
+                        'Sub-selectors must be RELATIVE to one item container.'
+                    ) from exc
+                if not sub_matched:
+                    empty.append(f'{name}[].{sub_name}')
+        if empty:
+            raise ModelRetry(
+                f'Selectors matched no elements on this page for fields: {empty}. '
+                'Anchor them to elements that actually exist in the document.'
+            )
+
+    def _check_apply_succeeds(
+        self,
+        plan: SelectorMap,
+        selector: Selector,
+        schema: type[BaseModel],
+    ) -> None:
+        """Raise `ModelRetry` when applying the plan produces schema-invalid data."""
+        try:
+            self._apply_selectors(plan, selector, cast('type[TSchema]', schema))
+        except ValidationError as exc:
+            failures = '; '.join(f'{".".join(map(str, error["loc"]))}: {error["msg"]}' for error in exc.errors())
+            raise ModelRetry(
+                f'Applying the selectors to the live page produced invalid data: {failures}. '
+                'Adjust the failing selectors.'
+            ) from exc
+
+    def _apply_selectors(
+        self,
+        plan: SelectorMap,
+        selector: Selector,
+        schema: type[TSchema],
+    ) -> TSchema:
+        """Run `plan` against `selector` and build a validated `schema` instance."""
+        return schema.model_validate(self._apply_fields(plan.selectors, selector, schema))
+
+    def _apply_fields(
+        self,
+        fields: dict[str, FieldSelector],
+        scope: Selector,
+        schema: type[BaseModel],
+    ) -> dict[str, Any]:
+        """Apply one level of the selector tree relative to `scope`.
+
+        Item-group fields recurse. The container selector enumerates item elements. Sub-selectors run relative to
+        each item (native Parsel behavior of `element.css(...)`).
+        """
+        raw: dict[str, Any] = {}
+        for name, info in schema.model_fields.items():
+            field_selector = fields.get(name)
+            if field_selector is None:
+                continue
+            kind, inner = self._classify_field(info.annotation)
+
+            if kind is _FieldKind.LIST_MODEL and inner is not None:
+                raw[name] = [
+                    self._apply_fields(field_selector.fields or {}, element, inner)
+                    for element in scope.css(field_selector.selector)
+                ]
+            elif kind is _FieldKind.NESTED_MODEL and inner is not None:
+                matched = scope.css(field_selector.selector)
+                raw[name] = self._apply_fields(field_selector.fields or {}, matched[0], inner) if matched else None
+            elif kind is _FieldKind.LIST_SCALAR:
+                raw[name] = [value.strip() for value in scope.css(field_selector.selector).getall()]
+            else:
+                value = scope.css(field_selector.selector).get()
+                raw[name] = value.strip() if isinstance(value, str) else value
+        return raw
+
+    @staticmethod
+    def _build_cache_digest(schema: type[BaseModel], scope: str | None, cache_tag: str | None) -> str:
+        """Build the digest identifying a `(schema, scope, cache_tag)` bucket.
+
+        Scope and tag are part of the identity. The same schema extracted from a different region or page kind gets
+        its own selector bucket.
+        """
+        return hashlib.sha256(
+            json.dumps(schema.model_json_schema(), sort_keys=True).encode()
+            + (scope or '').encode()
+            + b'\x00'
+            + (cache_tag or '').encode()
+        ).hexdigest()[:16]
+
+    @staticmethod
+    def _unwrap_optional(annotation: Any) -> Any:
+        """Return `X` for `X | None`, the annotation unchanged otherwise."""
+        origin = get_origin(annotation)
+        if origin is Union or origin is types.UnionType:
+            args = [a for a in get_args(annotation) if a is not type(None)]
+            if len(args) == 1:
+                return args[0]
+        return annotation
+
+    @staticmethod
+    def _is_union(annotation: Any) -> bool:
+        """Return whether `annotation` is a non-Optional union."""
+        origin = get_origin(annotation)
+        return origin is Union or origin is types.UnionType
+
+    def _classify_field(self, annotation: Any) -> tuple[_FieldKind, type[BaseModel] | None]:
+        """Classify a field annotation into its selector-mapping shape.
+
+        Single source of truth for the field-shape introspection shared by the capability gate, the prompt renderer
+        and the selector applier. Optional wrappers (`X | None`) are stripped first, so `str | None` is a leaf and
+        `list[Item] | None` is a list of models.
+
+        Args:
+            annotation: The raw field annotation, possibly `Optional`.
+        """
+        annotation = self._unwrap_optional(annotation)
+        origin = get_origin(annotation)
+
+        if origin is list:
+            args = get_args(annotation)
+            item = self._unwrap_optional(args[0]) if args else str
+            # `list[A | B]` is ambiguous: a match can't be tied to a specific union member, so treat it as unsupported.
+            if self._is_union(item):
+                return _FieldKind.LIST_UNION, None
+            if isinstance(item, type) and issubclass(item, BaseModel):
+                return _FieldKind.LIST_MODEL, item
+            if get_origin(item) is list:
+                return _FieldKind.LIST_OF_LISTS, None
+            return _FieldKind.LIST_SCALAR, None
+        if isinstance(annotation, type) and issubclass(annotation, BaseModel):
+            return _FieldKind.NESTED_MODEL, annotation
+        if origin is dict:
+            return _FieldKind.MAPPING, None
+        if origin is not None:
+            return _FieldKind.UNSUPPORTED, None
+        return _FieldKind.LEAF, None
+
+    def _unsupported_schema_reason(self, schema: type[BaseModel], *, depth: int = 0) -> str | None:
+        """Return why `schema` cannot be served by cached selectors, or `None`.
+
+        Supported shapes are scalar leaves, lists of scalars, lists of models with leaf-only fields, and single
+        nested models with leaf-only fields. Run before generation to avoid spending LLM retries on an impossible
+        schema.
+
+        Args:
+            schema: The schema to check.
+            depth: Current recursion depth.
+        """
+        for name, info in schema.model_fields.items():
+            kind, model = self._classify_field(info.annotation)
+
+            if kind is _FieldKind.LIST_UNION:
+                return f'field {name!r} is a list of a union type'
+            if kind is _FieldKind.LIST_OF_LISTS:
+                return f'field {name!r} is a list of lists'
+            if kind is _FieldKind.MAPPING:
+                return f'field {name!r} is a mapping'
+            if kind is _FieldKind.UNSUPPORTED:
+                return f'field {name!r} has an unsupported annotation {info.annotation!r}'
+
+            if kind in (_FieldKind.LIST_MODEL, _FieldKind.NESTED_MODEL) and model is not None:
+                if depth >= 1:
+                    noun = 'item lists' if kind is _FieldKind.LIST_MODEL else 'models'
+                    return f'field {name!r} nests {noun} deeper than one level'
+                reason = self._unsupported_schema_reason(model, depth=depth + 1)
+                if reason is not None:
+                    return reason
+        return None
+
+    def _format_fields(self, schema: type[BaseModel]) -> str:
+        """Render the schema fields as an indented text block for the prompt.
+
+        The selector agent's output type is `SelectorMap`, so the user schema is invisible to the model. This spells
+        out the field names, types, item-group structure, and descriptions.
+
+        Args:
+            schema: The Pydantic model whose fields to render.
+
+        Raises:
+            ValueError: When recursion exceeds `_MAX_RENDER_DEPTH`.
+        """
+
+        def render(model: type[BaseModel], indent: int) -> list[str]:
+            if indent > self._MAX_RENDER_DEPTH:
+                raise ValueError(f'Schema rendering exceeded depth {self._MAX_RENDER_DEPTH}')
+            pad = '  ' * indent
+            lines: list[str] = []
+            for name, info in model.model_fields.items():
+                description = f': {info.description}' if info.description else ''
+                kind, inner = self._classify_field(info.annotation)
+                if kind is _FieldKind.LIST_MODEL and inner is not None:
+                    lines.append(f'{pad}- {name} (list of items, each with:){description}')
+                    lines.extend(render(inner, indent + 1))
+                elif kind is _FieldKind.NESTED_MODEL and inner is not None:
+                    lines.append(f'{pad}- {name} (item with:){description}')
+                    lines.extend(render(inner, indent + 1))
+                else:
+                    annotation = self._unwrap_optional(info.annotation)
+                    type_name = annotation.__name__ if isinstance(annotation, type) else str(annotation)
+                    lines.append(f'{pad}- {name} ({type_name}){description}')
+            return lines
+
+        return '\n'.join(render(schema, 0))
diff --git a/src/crawlee/crawlers/_ai/_skeleton_distiller.py b/src/crawlee/crawlers/_ai/_skeleton_distiller.py
new file mode 100644
index 0000000000..f77b0bb11d
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_skeleton_distiller.py
@@ -0,0 +1,216 @@
+from __future__ import annotations
+
+from itertools import groupby
+from logging import getLogger
+from typing import TYPE_CHECKING
+
+from lxml import etree  # ty: ignore[unresolved-import]
+from typing_extensions import override
+
+from crawlee._utils.docs import docs_group
+
+from ._base_distiller import _WHITESPACE_RE
+from ._clean_html_distiller import AiCleanHtmlDistiller
+from ._prompts import _SKELETON_PROMPT_NOTES, _TRUNCATION_MARKER
+
+if TYPE_CHECKING:
+    from lxml.html import HtmlElement
+    from lxml_html_clean import Cleaner
+
+
+# Attributes that give an element an identity rather than mark it as one instance of a repeating pattern.
+_IDENTITY_ATTRS = ('name', 'property', 'itemprop', 'itemtype', 'role', 'type')
+
+# Tags that are never collapsed even when their signature repeats. `<br>` and `<hr>` are layout markers.
+_NEVER_COLLAPSE_TAGS = frozenset({'br', 'hr'})
+
+logger = getLogger(__name__)
+
+
+@docs_group('Other')
+class AiSkeletonDistiller(AiCleanHtmlDistiller):
+    """Distiller that produces a DOM skeleton used to ask an LLM for CSS selectors.
+
+    The skeleton is built from the page by removing nodes, attributes, and class tokens, or by truncating text. It
+    never renames or re-parents elements. So any selector the LLM builds from the skeleton also matches the
+    original page.
+
+    This is the default distiller for `AiSelectorExtractor`. See `AiCleanHtmlDistiller` for the direct-extraction
+    variant that keeps the full page text.
+
+    On top of the base cleaning:
+
+    - text nodes are truncated to `max_text_len`, so the model sees samples
+      rather than full content.
+    - JSON payloads are capped at `max_json_len`, so only their key structure
+      reaches the model.
+    - runs of repeated siblings are collapsed to the first `keep_siblings`
+      items plus a comment marker. Siblings with a distinct identity attribute
+      (`name`, `property`, `itemprop`, ...) are kept, since a run of `<meta>`
+      tags is not a repeating template.
+    - if the result still exceeds `max_size`, it is re-distilled with tighter
+      settings. Cutting the output is the last resort.
+
+    ### Usage
+
+    ```python
+    from crawlee.crawlers import AiSkeletonDistiller
+
+    distiller = AiSkeletonDistiller(max_text_len=80)
+    skeleton = distiller.distill('<html>...</html>')
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        cleaner: Cleaner | None = None,
+        max_text_len: int = 50,
+        max_json_len: int | None = 1_000,
+        keep_siblings: int = 3,
+        max_classes: int = 5,
+        max_attr_len: int = 100,
+        keep_head: bool = True,
+        max_size: int | None = 60_000,
+        pretty: bool = False,
+        prompt_notes: str | None = _SKELETON_PROMPT_NOTES,
+    ) -> None:
+        """Initialize a new instance.
+
+        Args:
+            cleaner: A custom `lxml_html_clean.Cleaner`.
+            max_text_len: Cap on a text node, in characters.
+            max_json_len: Cap on JSON payload length, or `None` to keep in full.
+            keep_siblings: How many leading siblings to keep when a repeated run is collapsed.
+            max_classes: How many class tokens to keep per element.
+            max_attr_len: Cap on attribute value length, in characters.
+            keep_head: Whether to keep a reduced `<head>`.
+            max_size: Hard cap on the skeleton, in characters. A tightening re-distillation runs first. If the result
+                is still too big, the tail is dropped and replaced with the truncation marker.
+            pretty: Whether to pretty-print the serialized HTML.
+            prompt_notes: Override for the default prompt notes. Pass `None` to send no notes to the LLM.
+        """
+        super().__init__(
+            cleaner=cleaner,
+            max_classes=max_classes,
+            max_attr_len=max_attr_len,
+            max_json_len=max_json_len,
+            keep_head=keep_head,
+            max_size=max_size,
+            pretty=pretty,
+            prompt_notes=prompt_notes,
+        )
+        self._max_text_len = max_text_len
+        self._keep_siblings = keep_siblings
+
+    @override
+    def _reduce(self, tree: HtmlElement) -> None:
+        """Apply base reduction, then collapse repeated sibling runs.
+
+        Args:
+            tree: The cleaned lxml tree.
+        """
+        super()._reduce(tree)
+        self._collapse_repeated_siblings(tree)
+
+    def _truncate_text(self, text: str | None) -> str | None:
+        """Collapse whitespace, then cap the text length.
+
+        Args:
+            text: The text to normalize, or `None`.
+        """
+        if not text:
+            return text
+
+        truncated_text = _WHITESPACE_RE.sub(' ', text)
+        if len(truncated_text) > self._max_text_len:
+            return truncated_text[: self._max_text_len].rstrip() + _TRUNCATION_MARKER
+
+        return truncated_text
+
+    @override
+    def _normalize_text(self, tree: HtmlElement) -> None:
+        """Collapse whitespace and truncate text to short samples.
+
+        Args:
+            tree: The lxml tree.
+        """
+        for elem in tree.iter():
+            if not isinstance(elem.tag, str):
+                continue
+
+            if not self._is_json_script(elem):
+                elem.text = self._truncate_text(elem.text)
+
+            elem.tail = self._truncate_text(elem.tail)
+
+    def _collapse_repeated_siblings(self, tree: HtmlElement) -> None:
+        """Collapse runs of equivalent siblings to the first few plus a marker.
+
+        Args:
+            tree: The lxml tree.
+        """
+
+        def signature(el: HtmlElement) -> tuple:
+            return (
+                el.tag,
+                tuple(sorted((el.get('class') or '').split())),
+                tuple(el.get(attr) for attr in _IDENTITY_ATTRS),
+            )
+
+        for parent in list(tree.iter()):
+            if not isinstance(parent.tag, str):
+                continue
+
+            children = [child for child in parent if isinstance(child.tag, str)]
+            for sig, group_iter in groupby(children, key=signature):
+                group = list(group_iter)
+
+                if len(group) <= self._keep_siblings or sig[0] == 'script' or sig[0] in _NEVER_COLLAPSE_TAGS:
+                    continue
+
+                saved, dropped = group[: self._keep_siblings], group[self._keep_siblings :]
+
+                for elem in dropped:
+                    parent.remove(elem)
+
+                classes = '.'.join(sig[1])
+                label = f'{sig[0]}.{classes}' if classes else sig[0]
+
+                saved[-1].addnext(etree.Comment(f' ...{len(dropped)} more <{label}> siblings omitted '))
+
+    @override
+    def _enforce_max_size(self, distilled_html: str, html: str) -> str:
+        """Tighten the skeleton if it exceeds `max_size`, and cut as a last resort.
+
+        Args:
+            distilled_html: The skeleton output to size-check.
+            html: The original markup, re-distilled with tighter settings when the budget is breached.
+        """
+        if self._max_size is None or len(distilled_html) <= self._max_size:
+            return distilled_html
+
+        # The skeleton is too big, re-distill with tighter settings.
+        tighter = AiSkeletonDistiller(
+            cleaner=self._cleaner,
+            max_text_len=max(self._max_text_len // 2, 15),
+            max_json_len=self._max_json_len,
+            keep_siblings=1,
+            max_classes=self._max_classes,
+            max_attr_len=self._max_attr_len,
+            keep_head=self._keep_head,
+            max_size=None,  # prevent infinite recursion
+            pretty=self._pretty,
+        )
+        tighter_distilled_html = tighter.distill(html)
+
+        # Still too big, cut mid-stream and warn.
+        if len(tighter_distilled_html) > self._max_size:
+            logger.warning(
+                f'Skeleton exceeds max_size even after tightening ({len(tighter_distilled_html)} > {self._max_size}). '
+                'The tail of the page is cut off and invisible to the LLM. '
+                'Raise `max_size`, `scope` the extraction, or set `max_json_len`.'
+            )
+            return tighter_distilled_html[: self._max_size] + _TRUNCATION_MARKER
+
+        return tighter_distilled_html
diff --git a/src/crawlee/crawlers/_ai/_types.py b/src/crawlee/crawlers/_ai/_types.py
new file mode 100644
index 0000000000..30e08e8499
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_types.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Protocol
+
+from pydantic import BaseModel
+from typing_extensions import TypeVar
+
+from crawlee._utils.docs import docs_group
+
+if TYPE_CHECKING:
+    from parsel import Selector
+    from pydantic_ai.usage import RunUsage
+
+
+TSchema = TypeVar('TSchema', bound=BaseModel)
+
+
+@docs_group('Functions')
+class AiHtmlDistiller(Protocol):
+    """Interface for HTML distillers.
+
+    A distiller reduces raw HTML to a compact representation that an LLM can read cheaply. The built-in
+    distillers are `AiCleanHtmlDistiller` and `AiSkeletonDistiller`.
+    """
+
+    def distill(self, html: str) -> str:
+        """Convert raw HTML to a compact representation suitable for an LLM."""
+
+    def get_prompt_notes(self) -> str | None:
+        """Return a short description of the produced representation, or `None`."""
+
+
+@docs_group('Other')
+@dataclass
+class AiUsageStats:
+    """A lightweight accumulator of token usage across extraction calls."""
+
+    requests: int = 0
+    input_tokens: int = 0
+    output_tokens: int = 0
+
+    @property
+    def total_tokens(self) -> int:
+        """The sum of input and output tokens."""
+        return self.input_tokens + self.output_tokens
+
+    def add(self, usage: RunUsage) -> None:
+        """Accumulate the usage reported by a single run."""
+        self.requests += usage.requests
+        self.input_tokens += usage.input_tokens
+        self.output_tokens += usage.output_tokens
+
+
+@docs_group('Other')
+class AiHtmlExtractor(Protocol):
+    """Interface for HTML extractors.
+
+    An extractor turns an HTML page into a validated Pydantic model using an LLM. The input format (cleaned HTML,
+    skeleton, Markdown, ...) is decided by the `AiHtmlDistiller` an implementation composes. The model and base
+    instructions are set at construction. Each `extract` call runs one extraction. The built-in extractors are
+    `AiDirectExtractor` and `AiSelectorExtractor`.
+    """
+
+    async def extract(
+        self,
+        content: str | Selector,
+        schema: type[TSchema],
+        *,
+        scope: str | None = None,
+        cache_tag: str | None = None,
+        additional_instructions: str | None = None,
+    ) -> TSchema:
+        """Extract a structured instance of `schema` from `content`.
+
+        Args:
+            content: Raw HTML or a parsed Parsel `Selector`. A `Selector` is the fast path. The crawler passes its
+                live parsed tree directly and skips a re-parse. Treat it as read-only, since the user handler shares
+                it.
+            schema: The Pydantic model describing the desired output.
+            scope: Optional CSS selector. Extraction is restricted to the first matching subtree. A scope that matches
+                nothing raises an error.
+            cache_tag: Optional tag for caching implementations. Selectors are bucketed per tag, so one schema can
+                serve several page kinds without competing. The crawler usually passes `request.label`.
+                Implementations without caching ignore it.
+            additional_instructions: Extra instructions for this call only. They are appended to the base
+                instructions, not a replacement. Use them for page specifics (e.g. 'the price is the discounted one,
+                not the list price').
+        """
+
+    @property
+    def ai_usage(self) -> AiUsageStats:
+        """Accumulated token usage across extraction calls."""
+
+    def set_ai_usage(self, value: AiUsageStats) -> None:
+        """Replace the usage accumulator with `value`.
+
+        Lets an external owner share one accumulator across a delegation chain. `AiSelectorExtractor` uses this to
+        fold its fallback's usage into one accumulator. Extractors with per-instance counters may make it a no-op.
+
+        Args:
+            value: The accumulator to adopt.
+        """
+
+
+@docs_group('Functions')
+class ExtractFunction(Protocol):
+    """The `extract` helper exposed on `AiCrawlingContext`.
+
+    Binds the configured extractor to the current page, so a handler passes just the schema and the optional
+    per-call knobs.
+    """
+
+    async def __call__(
+        self,
+        schema: type[TSchema],
+        *,
+        scope: str | None = None,
+        cache_tag: str | None = None,
+        additional_instructions: str | None = None,
+    ) -> TSchema:
+        """Extract an instance of `schema` from the current page.
+
+        Args:
+            schema: The Pydantic model describing the desired output.
+            scope: Optional CSS selector restricting extraction to the first matching subtree. Saves tokens and
+                prevents matches outside the region of interest.
+            cache_tag: Optional tag used by caching extractors to bucket cached selectors per page kind. Defaults to
+                `context.request.label`.
+            additional_instructions: Extra instructions appended to the base instructions for this call only (e.g.
+                'the price is the discounted one, not the list price'). Does not replace the base instructions.
+        """
diff --git a/src/crawlee/crawlers/_ai/_utils.py b/src/crawlee/crawlers/_ai/_utils.py
new file mode 100644
index 0000000000..8460599af7
--- /dev/null
+++ b/src/crawlee/crawlers/_ai/_utils.py
@@ -0,0 +1,28 @@
+from lxml_html_clean import Cleaner
+
+
+def get_basic_ai_cleaner(**kwargs: object) -> Cleaner:
+    """Build the default `lxml_html_clean.Cleaner` used by built-in distillers.
+
+    Args:
+        kwargs: Overrides for individual `Cleaner` options.
+    """
+    options: dict = {
+        'scripts': True,  # JSON scripts are protected separately by the distiller
+        'javascript': True,
+        'comments': True,
+        'style': True,
+        'inline_style': True,
+        'links': True,
+        'meta': False,  # meta tags carry structured data, handled by the distiller
+        'page_structure': False,
+        'embedded': True,
+        'frames': True,
+        'forms': False,
+        'annoying_tags': True,
+        'kill_tags': ('svg', 'noscript', 'template', 'canvas', 'video', 'audio', 'source'),
+        'remove_unknown_tags': False,  # required: the JSON-script protect tag must survive
+        'safe_attrs_only': False,  # the distiller filters attributes separately
+    }
+    options.update(kwargs)
+    return Cleaner(**options)
diff --git a/uv.lock b/uv.lock
index 9df341a67b..eb125df817 100644
--- a/uv.lock
+++ b/uv.lock
@@ -800,6 +800,11 @@ adaptive-crawler = [
     { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
 ]
+ai = [
+    { name = "lxml", extra = ["html-clean"] },
+    { name = "parsel" },
+    { name = "pydantic-ai-slim", extra = ["openai"] },
+]
 all = [
     { name = "aiomysql" },
     { name = "aiosqlite" },
@@ -814,6 +819,7 @@ all = [
     { name = "httpx", extra = ["brotli", "http2", "zstd"] },
     { name = "inquirer" },
     { name = "jaro-winkler" },
+    { name = "lxml", extra = ["html-clean"] },
     { name = "opentelemetry-api" },
     { name = "opentelemetry-distro", extra = ["otlp"] },
     { name = "opentelemetry-instrumentation" },
@@ -822,6 +828,7 @@ all = [
     { name = "opentelemetry-semantic-conventions" },
     { name = "parsel" },
     { name = "playwright" },
+    { name = "pydantic-ai-slim", extra = ["openai"] },
     { name = "redis", extra = ["hiredis"] },
     { name = "rich" },
     { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
@@ -936,7 +943,7 @@ requires-dist = [
     { name = "cachetools", specifier = ">=5.5.0" },
     { name = "colorama", specifier = ">=0.4.0" },
     { name = "cookiecutter", marker = "extra == 'cli'", specifier = ">=2.6.0" },
-    { name = "crawlee", extras = ["adaptive-crawler", "beautifulsoup", "cli", "curl-impersonate", "httpx", "parsel", "playwright", "otel", "sql-sqlite", "sql-postgres", "sql-mysql", "stagehand", "redis"], marker = "extra == 'all'" },
+    { name = "crawlee", extras = ["adaptive-crawler", "ai", "beautifulsoup", "cli", "curl-impersonate", "httpx", "parsel", "playwright", "otel", "sql-sqlite", "sql-postgres", "sql-mysql", "stagehand", "redis"], marker = "extra == 'all'" },
     { name = "cryptography", marker = "extra == 'sql-mysql'", specifier = ">=46.0.5" },
     { name = "curl-cffi", marker = "extra == 'curl-impersonate'", specifier = ">=0.9.0" },
     { name = "html5lib", marker = "extra == 'beautifulsoup'", specifier = ">=1.0" },
@@ -944,6 +951,7 @@ requires-dist = [
     { name = "impit", specifier = ">=0.8.0" },
     { name = "inquirer", marker = "extra == 'cli'", specifier = ">=3.3.0" },
     { name = "jaro-winkler", marker = "extra == 'adaptive-crawler'", specifier = ">=2.0.3" },
+    { name = "lxml", extras = ["html-clean"], marker = "extra == 'ai'", specifier = ">=5.2.0" },
     { name = "more-itertools", specifier = ">=10.2.0" },
     { name = "opentelemetry-api", marker = "extra == 'otel'", specifier = ">=1.34.1" },
     { name = "opentelemetry-distro", extras = ["otlp"], marker = "extra == 'otel'", specifier = ">=0.54" },
@@ -951,6 +959,7 @@ requires-dist = [
     { name = "opentelemetry-instrumentation-httpx", marker = "extra == 'otel'", specifier = ">=0.54" },
     { name = "opentelemetry-sdk", marker = "extra == 'otel'", specifier = ">=1.34.1" },
     { name = "opentelemetry-semantic-conventions", marker = "extra == 'otel'", specifier = ">=0.54" },
+    { name = "parsel", marker = "extra == 'ai'", specifier = ">=1.10.0" },
     { name = "parsel", marker = "extra == 'parsel'", specifier = ">=1.10.0" },
     { name = "playwright", marker = "extra == 'adaptive-crawler'", specifier = ">=1.27.0" },
     { name = "playwright", marker = "extra == 'playwright'", specifier = ">=1.27.0" },
@@ -958,6 +967,7 @@ requires-dist = [
     { name = "protego", specifier = ">=0.5.0" },
     { name = "psutil", specifier = ">=6.0.0" },
     { name = "pydantic", specifier = ">=2.11.0" },
+    { name = "pydantic-ai-slim", extras = ["openai"], marker = "extra == 'ai'", specifier = ">=1.106.0" },
     { name = "pydantic-settings", specifier = ">=2.12.0" },
     { name = "pyee", specifier = ">=9.0.0" },
     { name = "redis", extras = ["hiredis"], marker = "extra == 'redis'", specifier = ">=7.0.0" },
@@ -973,7 +983,7 @@ requires-dist = [
     { name = "wrapt", marker = "extra == 'otel'", specifier = ">=1.17.0" },
     { name = "yarl", specifier = ">=1.18.0" },
 ]
-provides-extras = ["all", "adaptive-crawler", "beautifulsoup", "cli", "curl-impersonate", "httpx", "parsel", "playwright", "otel", "sql-postgres", "stagehand", "sql-sqlite", "sql-mysql", "redis"]
+provides-extras = ["all", "adaptive-crawler", "beautifulsoup", "cli", "curl-impersonate", "httpx", "ai", "parsel", "playwright", "otel", "sql-postgres", "stagehand", "sql-sqlite", "sql-mysql", "redis"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -1290,6 +1300,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/47/dd9a212ef6e343a6857485ffe25bba537304f1913bdbed446a23f7f592e1/filelock-3.29.0-py3-none-any.whl", hash = "sha256:96f5f6344709aa1572bbf631c640e4ebeeb519e08da902c39a001882f30ac258", size = 39812, upload-time = "2026-04-19T15:39:08.752Z" },
 ]
 
+[[package]]
+name = "genai-prices"
+version = "0.0.65"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx2" },
+    { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e4/1e/62a8d65d263f0315a9dbd948110ad71c70ea16b58297c8c2121917848766/genai_prices-0.0.65.tar.gz", hash = "sha256:9cfe2ad2b4fdd68a8c9d2d189392e0d5bdd17048abd4105eb7c740c61392090f", size = 68908, upload-time = "2026-06-06T19:39:28.269Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d8/9b/727c0b640d72c857bfb2b30b4ae2b16f612d81c5c2f09862a52b4697cbce/genai_prices-0.0.65-py3-none-any.whl", hash = "sha256:6f9239a21ce13c9fb329825dc7e06520bc582227b83b51fe82ab71f2dd3850da", size = 71549, upload-time = "2026-06-06T19:39:29.219Z" },
+]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.75.0"
@@ -1368,6 +1391,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/15/32/77ee8a6c1564fc345a491a4e85b3bf360e4cf26eac98c4532d2fdb96e01f/greenlet-3.5.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d60097128cb0a1cab9ea541186ea13cd7b847b8449a7787c2e2350da0cb82d86", size = 245324, upload-time = "2026-04-27T12:24:40.295Z" },
 ]
 
+[[package]]
+name = "griffelib"
+version = "2.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9d/82/74f4a3310cdabfbb10da554c3a672847f1ed33c6f61dd472681ce7f1fe67/griffelib-2.0.2.tar.gz", hash = "sha256:3cf20b3bc470e83763ffbf236e0076b1211bac1bc67de13daf494640f2de707e", size = 166461, upload-time = "2026-03-27T11:34:51.091Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/11/8c/c9138d881c79aa0ea9ed83cbd58d5ca75624378b38cee225dcf5c42cc91f/griffelib-2.0.2-py3-none-any.whl", hash = "sha256:925c857658fb1ba40c0772c37acbc2ab650bd794d9c1b9726922e36ea4117ea1", size = 142357, upload-time = "2026-03-27T11:34:46.275Z" },
+]
+
 [[package]]
 name = "grpcio"
 version = "1.80.0"
@@ -1572,6 +1604,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
 ]
 
+[[package]]
+name = "httpcore2"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "h11" },
+    { name = "truststore" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e6/34/18f1c596e677962f040284246f393b10a1f8ce440b3a7e69c637d0f1c7ad/httpcore2-2.3.0.tar.gz", hash = "sha256:07327e251560960eea8e969d92d4c6a325feb13cca39e25340731336c3baf924", size = 64300, upload-time = "2026-06-01T13:15:02.998Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/dd/3357218c69360d1cecc196c230c9a1d5c9afd5dba362056e23e60a5e64e5/httpcore2-2.3.0-py3-none-any.whl", hash = "sha256:477e9e334f74e5240dcac002e890580f36a57d40ff0fb14cc9655731d23b8415", size = 80024, upload-time = "2026-06-01T13:15:00.001Z" },
+]
+
 [[package]]
 name = "httptools"
 version = "0.7.1"
@@ -1642,6 +1687,21 @@ zstd = [
     { name = "zstandard" },
 ]
 
+[[package]]
+name = "httpx2"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "httpcore2" },
+    { name = "idna" },
+    { name = "truststore" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9f/9a/cca0b9145f13d8ae34b885ae28d403a1469a433abc78e0f94f4ce94e650b/httpx2-2.3.0.tar.gz", hash = "sha256:227e7c41d95a76d4077a52640564132777215fc3394e07b66a3116c33d668fa9", size = 81115, upload-time = "2026-06-01T13:15:04.324Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/ce/ae2911859847f9ba1d6b23027e53481cbeb50b93234f355a968d300ca2cb/httpx2-2.3.0-py3-none-any.whl", hash = "sha256:6f393663bdf6dbe7fe90118e3eb5b2bd024a675cae0390ac08cec9198812d8b7", size = 74538, upload-time = "2026-06-01T13:15:01.566Z" },
+]
+
 [[package]]
 name = "hyperframe"
 version = "6.1.0"
@@ -1791,6 +1851,109 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/00/b61668fd3b1e43b445979ec9a9e0af4781bf06884937d1e906f6a1be6dff/jinxed-2.0.0-py2.py3-none-any.whl", hash = "sha256:b3df1be5262a37145ef42875a8bbf918f1a563fbd035359650dd9fc0bb2b9294", size = 95364, upload-time = "2026-05-08T21:25:24.536Z" },
 ]
 
+[[package]]
+name = "jiter"
+version = "0.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/b5/55f06bb281d92fb3cc86d14e1def2bd908bb77693183e7cb1f5a3c388b0c/jiter-0.15.0.tar.gz", hash = "sha256:4251acc80e2b7c9b7b8823456ea0fceeb0734dac2df7636d3c711b38476b5a76", size = 166640, upload-time = "2026-05-19T10:09:48.361Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1d/da/76a2c7e510ba15fe323d9509c223ab272da79ea59f54488f4a78da6426db/jiter-0.15.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:edebcf7d1f601199084bb6e844d7dc67e03e04f6ac786b0332d616635c4ff7a4", size = 310849, upload-time = "2026-05-19T10:06:51.944Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/8e/827be942883a4dc0862c48626ff41af3320b1902d136a0bf4b9041f2c567/jiter-0.15.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9f924585cdacf631cd382b657966847bb537bf9ed0a6f9b991da5f05a631480f", size = 314991, upload-time = "2026-05-19T10:06:53.522Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/38/be2832be361ba1b9517c76f46d30b64e985be1dd43c974f4c3a4b1844436/jiter-0.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abbf258599526ad0326fe51e252e24f2bd6f24f1852681b4b78feda3808f1d18", size = 340843, upload-time = "2026-05-19T10:06:55.071Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/d8/90f01fb83c0c7ba509303ec93e32a308fbfa167d264860b01c0fd0dbbd06/jiter-0.15.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c468136b8bd6bb18c8786e4236a1fa27362f24cb23450ba0cb204ab379b8e6f", size = 365116, upload-time = "2026-05-19T10:06:56.893Z" },
+    { url = "https://files.pythonhosted.org/packages/91/38/94593d34f8c67a0b6f6cbc027f016ffa9780b3a858a7a86f6fd7a15bcc1e/jiter-0.15.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05906b93d72f03339e6bb7cf8dc10ebda64a0266126eed6beba79e20abcf5fd4", size = 457970, upload-time = "2026-05-19T10:06:58.707Z" },
+    { url = "https://files.pythonhosted.org/packages/df/04/d79962dd49d00c97e2a9b4cacea1947904d02135936960351f9a96d4c1a6/jiter-0.15.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:30ce785d2adb8e32c3f7741442370a74834ec4c01f3c48f0750227a0b4ef27d6", size = 375744, upload-time = "2026-05-19T10:07:00.471Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/2e/5d37abe2be0e819c21e2338bebd410e481763ce526a9138c8c3652fa0123/jiter-0.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2fd73e3da91a0a722d67165e849ce2cdc10de0e0d48738c142be8c6c5f310f4c", size = 349609, upload-time = "2026-05-19T10:07:01.829Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/90/98768ad2ed90c1fda15d64157de2dfbf73c1c074d4b1bfaca915480bc7cf/jiter-0.15.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:ceb8fc27d38793f9c97149be8302720c5b22e5c195a37bf2c45dc36c4600a512", size = 354366, upload-time = "2026-05-19T10:07:03.587Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/c4/fbfb806209f1fe4b7dccdfb07bc62bb044300734a945b06fd64db446ef6a/jiter-0.15.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d726e3ceeb337191324b49de298142f27c3ad10886341555d1d5315b5f252c6a", size = 393519, upload-time = "2026-05-19T10:07:05.08Z" },
+    { url = "https://files.pythonhosted.org/packages/37/1c/b9c257cd70cb453b6d10f3ebf0402cdb11669ab455389096f09839670290/jiter-0.15.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2c8aea7781d2a372227871de4e1a1332aa96f5a89fd76c5e835dafdbad102887", size = 519952, upload-time = "2026-05-19T10:07:06.589Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/1a/aa85027db7ab15829c12feebbc33b404f53fc399bd559d85fd0d6365ff0d/jiter-0.15.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cf4bd113a69c0a740e27cb962ce10630c36d2b8f59d759a651b955ee9d18a823", size = 550770, upload-time = "2026-05-19T10:07:08.228Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/54/8c3f65c8a5687925e84708f19d63f7f37d28e2b86a48d951702ad94424d8/jiter-0.15.0-cp310-cp310-win32.whl", hash = "sha256:d92a5cd21fdb083931d546c207aa29633787c5dc5b02daab2d32b843f88a2c53", size = 209303, upload-time = "2026-05-19T10:07:10.006Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/0528a1eb9f42dd2d8228a0711458628f35924d131f623eaebc35fd23d3d4/jiter-0.15.0-cp310-cp310-win_amd64.whl", hash = "sha256:e58585a58209d72691ce2d62a9147445f5a87beb0bde97fde284c96ae392a3d1", size = 200404, upload-time = "2026-05-19T10:07:11.426Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/13/daa722f5765c393576f466378f9dfd29d77c9bed939e0688f96afa3601ea/jiter-0.15.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:0f862193b8696249d22ec433e85fd2ab0ad9596bc3e45e6c0bc55e8aeba97be2", size = 310899, upload-time = "2026-05-19T10:07:12.89Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/82/2d2551829b082f4b6d82b9f939b031fb808a10aab1ec0664f82e150bb9a2/jiter-0.15.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1303d4d68a9b051ea90502402063ecf3807da00ad2affa19ca1ae3b90b3c5f67", size = 314963, upload-time = "2026-05-19T10:07:14.539Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/0a/8b1a51466f7fe9f31dbe4bc7e0ca848674f9825e0f737b929b97e8c60aa7/jiter-0.15.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:392b8ab019e5502d08aff85c6272209c24bc2cbe706ea82a56368f524236614a", size = 341730, upload-time = "2026-05-19T10:07:15.869Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/2a/e71dea19822e2e404e83992a08c1d6b9b617bb944f28c9c2fbd85d02c91e/jiter-0.15.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:773b6eb282ce11ee19f05f6b2d4404fa308e5bbd353b0b80a0262caad6db2cd7", size = 366214, upload-time = "2026-05-19T10:07:17.259Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/59/97e1fa539d124a509a00ab7f669289d1c1d236ecabf12948a18f16c91082/jiter-0.15.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8d2c0c44d569ce0f2850f5c926f8caeb5f245fbc84475aeb36efccc2103e6dbd", size = 459527, upload-time = "2026-05-19T10:07:18.741Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/7a/4a68d331aef8cf2e2393c14a3aacb635c62aa86071b0229899fb5baaa907/jiter-0.15.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:032396229564bca02440396bd327710719f724f5e7b7e9f7a8eb3faa4a2c2281", size = 375451, upload-time = "2026-05-19T10:07:20.208Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/7e/1c445c2b6f0e30a274dc8082e0c3c7825411cce80d726bccd697c98cc8d3/jiter-0.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d37768fce7f88dd2a8c6091f2325dea27d30d30d5c6e7a1c0f0af77723b708", size = 349428, upload-time = "2026-05-19T10:07:22.372Z" },
+    { url = "https://files.pythonhosted.org/packages/00/94/e20d38984fc17a636371bffd2ae0f698124fdc8e75ef969cd2da6ba7cea7/jiter-0.15.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:2c9cb907439d20bd0c7d7565ca01ee52234203208433749bae5b516907526928", size = 355405, upload-time = "2026-05-19T10:07:23.916Z" },
+    { url = "https://files.pythonhosted.org/packages/94/fa/4d09f814779d0ea80a28ed8e4c6662ec9a4a8ecef0ac52190ebac6262d14/jiter-0.15.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9100ddbec09741cc66feb0fc6773f8bdbd0e3c345689368f260082ff85dcc0cd", size = 393688, upload-time = "2026-05-19T10:07:25.854Z" },
+    { url = "https://files.pythonhosted.org/packages/54/9d/8eb5d4fb8bf7e93a75964a5da71a75c67c864baf7fa3f98598187b3c7e57/jiter-0.15.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ae1b0d82ac2d987f9ea512b1c9adfcc71a28de3dea3a6039b54d76cffda9901e", size = 520853, upload-time = "2026-05-19T10:07:27.303Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/2c/5e07874e59e623a943a0acf1552a80d05b70f31b402287a8fc6d7ec634c7/jiter-0.15.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8020c99ec13a7db2b6f96cbe82ef4721c88b426a4892f27478044af0284615ef", size = 551016, upload-time = "2026-05-19T10:07:28.846Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/d2d34422143474cadc15b60d482b1c35683dbc5c63c24346ddd0df09bcaf/jiter-0.15.0-cp311-cp311-win32.whl", hash = "sha256:42bfb257930800cf43e7c62c832402c704ab60797c992faf88d20e903eac8f32", size = 209518, upload-time = "2026-05-19T10:07:30.431Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/7d/52778b930e5cc3e52a37d950b1c10494244308b4329b25a0ff0d88303a81/jiter-0.15.0-cp311-cp311-win_amd64.whl", hash = "sha256:860a74063284a2ae9bfedd694f299cc2c68e2696c5f3d440cc9d18bb81b9dd04", size = 200565, upload-time = "2026-05-19T10:07:32.125Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/4f/d9b4067feb69b3fa6eb0488e1b59e2ad5b463fe39f59e527eab2aca00bb0/jiter-0.15.0-cp311-cp311-win_arm64.whl", hash = "sha256:37a10c377ce3a4a85f4a67f28b7afe093154cde77eaf248a72e856aa08b4d865", size = 195488, upload-time = "2026-05-19T10:07:33.846Z" },
+    { url = "https://files.pythonhosted.org/packages/44/53/4f6bddbcde3c71e56d0aa1337ec95950f3d27dd4153e25aadf0feac71751/jiter-0.15.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0e90a1c315a0226ec822d973817967f9223b7701546c8c2a7913e7ab0926294d", size = 308793, upload-time = "2026-05-19T10:07:35.25Z" },
+    { url = "https://files.pythonhosted.org/packages/01/84/c01099b59a285a1ebba64ae93f62bfa036675340fd1b0045ae65890a0442/jiter-0.15.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c9004af7c8d67cce7f1aae1026fb55607f4aa600710d08ede3a3ce4aeefe7e0", size = 309570, upload-time = "2026-05-19T10:07:36.919Z" },
+    { url = "https://files.pythonhosted.org/packages/58/64/8fb7f9d45bb98190355454cd04dad8d8f27223d6bd52f83af07f637168a6/jiter-0.15.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c210f8b35dc6f30aafd4b4365ca89b9d1189f21ab49b8e68fa6322a847aef138", size = 336783, upload-time = "2026-05-19T10:07:38.694Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/b6/f5739011d009b3a30f6a53c5240979030ba29ae46a8c67e3a15759f7c37d/jiter-0.15.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f30bae8bc1c2d613e28e5af3e8cceb09b742f1c8a8a5f839fb67afaffc03b61", size = 363555, upload-time = "2026-05-19T10:07:40.832Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/12/98a9d9f766665e8a3b6252454e17cb0c464606a28cf2fa09399b003345fa/jiter-0.15.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c60e71b6d10cfc284c9bf36bd885e8d44c46f688ce50aa91b5edd90181dea687", size = 452255, upload-time = "2026-05-19T10:07:42.62Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/d5/60f972840f79c5e7544fce567c56f1e4e50468f996baba3e78d823dd62a6/jiter-0.15.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ab068bce62a45aa3e7367eceaffb5dde60b7eb853be8dece45132e3d0ff4879", size = 373559, upload-time = "2026-05-19T10:07:44.201Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/cf/d46ef1234ba335aabc2f013210db8e0821a22f5e644a2e9449df199ecc23/jiter-0.15.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa248c9eb220197d363f688818dac2fd4b2f0cd7d843ca7105d652034823427d", size = 346055, upload-time = "2026-05-19T10:07:46.005Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/63/4d2749d8d54d230bad9b3a6b0d00cc28c6ff6b2fdffc26a8ccf76cc5a974/jiter-0.15.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2a77aadd57cac1682e4401a72724d2796d89a4ba129b1a5812aa94ee480826eb", size = 351406, upload-time = "2026-05-19T10:07:47.855Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/b9/9965b990035d8773328e0a8c8b457a87bf2b19f6c4126d9d99296be5d16a/jiter-0.15.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ae901f3a55bfafdde31d289590fa25e3245735a2b1e8c7cc15871710a002871", size = 389357, upload-time = "2026-05-19T10:07:49.665Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/55/9ddf903deda1413e87fed792f416b7123daee5b8efbad6a202a7421c36a5/jiter-0.15.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f0b271b462769543716f92d3a4f90527df6ef5ed05ee95ec4137f513e21e1b77", size = 517263, upload-time = "2026-05-19T10:07:51.537Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/76/a0c40ad064d3a20a4fde231e35d56e9a01ce82164278180e82d5daf85469/jiter-0.15.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:2fb6a5d26af81fc0f00f9360a891e05cf755e149bba391c4d563adc54812973d", size = 548646, upload-time = "2026-05-19T10:07:53.196Z" },
+    { url = "https://files.pythonhosted.org/packages/23/4f/eca9b954942916ba2f453891b8593ab444cd872396fe66a3936616f236f3/jiter-0.15.0-cp312-cp312-win32.whl", hash = "sha256:c2f6bb8b5216ab9e7873bc08b5d7bef2b8abbb578a3069bf1cd14a45d71d771d", size = 206427, upload-time = "2026-05-19T10:07:55.307Z" },
+    { url = "https://files.pythonhosted.org/packages/95/bf/8ead82a87495149542748e828d153fd232a512a22c83b02c4815c1a9c7d8/jiter-0.15.0-cp312-cp312-win_amd64.whl", hash = "sha256:40b2c7e92c44a84d748d21706c68dc6ff8161d80b59c99d774721a0d2317d7c7", size = 197300, upload-time = "2026-05-19T10:07:56.651Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/e4/9b8a78fb2d894471bc344e37f1949bdd784bd914d031dba0ba3a40c71dd7/jiter-0.15.0-cp312-cp312-win_arm64.whl", hash = "sha256:cc0bc345cf2df9d1c00ac443f50d543c1ccfa8b0422cb85b1ab70d681c0b255b", size = 192702, upload-time = "2026-05-19T10:07:58.307Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/f4/f708c900ecee41b2025ef8413d5351e5649eb2125c506f6720cc69b06f5c/jiter-0.15.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1c11465f97e2abf45a014b83b730222f8f1c5335e802c7055a67d50de6f1f4e3", size = 307829, upload-time = "2026-05-19T10:07:59.704Z" },
+    { url = "https://files.pythonhosted.org/packages/86/59/db537c0949e83668c38481d426b9f2fd5ab758c4ee53a811dd0a510626a0/jiter-0.15.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d1e7b1776f0797956c509e123d0952d10d293a9492dea9f288ab9570ec01d1a5", size = 308445, upload-time = "2026-05-19T10:08:01.184Z" },
+    { url = "https://files.pythonhosted.org/packages/37/38/ea0e13b18c30ef951da0d47d39e7fa9edb82a93a62990ffbd7cea9b622d4/jiter-0.15.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:351a341c2105aa430b7047e30f1bf7975f6313b00165d3fc07be2edaf741f279", size = 336181, upload-time = "2026-05-19T10:08:02.688Z" },
+    { url = "https://files.pythonhosted.org/packages/58/fc/2303901b16c4ba05865588990a420c0b4156270b44379c20931544a1d962/jiter-0.15.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4ab395feec8d249ec4044e228e98a7033f043426a265df439dc3698823f0a4e4", size = 362985, upload-time = "2026-05-19T10:08:04.394Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/6f/11bace093c52e7d4d26c8e606ccd7ae8c972189622469ec0d9e28161e28b/jiter-0.15.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2a438005b6f22d0273413484d6094d7c2c5d10ec1b3a3bf128e0d1d3ba53258", size = 453292, upload-time = "2026-05-19T10:08:05.967Z" },
+    { url = "https://files.pythonhosted.org/packages/22/db/987f2f086ca4d7a6582eb4ccd513f9b26b42d9e4243a087609a3137a8fc7/jiter-0.15.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f18f85e4218d1b40f000f42a92239a7a61a902cd42c65e6c360dbd17dcb20894", size = 373501, upload-time = "2026-05-19T10:08:07.857Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/7c/89fbcabb2739b7a5b8dc959a1b6c5761f6484f5fed3486854b3c789bb1de/jiter-0.15.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1aa62e277fc1cbd80e6deacae6f4d983b41b3d7728e0645c5d741a6149bba45", size = 344683, upload-time = "2026-05-19T10:08:09.431Z" },
+    { url = "https://files.pythonhosted.org/packages/30/6f/6cca7692e7dddfec6d8d76c54dc97f2af2a41df4ac0674b999df1f09a5f3/jiter-0.15.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:6550fa135c7deb8ead6af49ed7ff648532ea8334a1447fe34a36315ef79c5c29", size = 350892, upload-time = "2026-05-19T10:08:11.352Z" },
+    { url = "https://files.pythonhosted.org/packages/39/14/0338d6190cb8e6d22e677ab1d4eabd4117f67cca70c54cd04b82ff64e068/jiter-0.15.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:066f8f33f18b2419cd8213b2436fa7fbc9c499f315971cfa3ce1f9820c001b1b", size = 388723, upload-time = "2026-05-19T10:08:12.912Z" },
+    { url = "https://files.pythonhosted.org/packages/90/31/cc19f4a1bdb6afb09ce6a2f2615aa8d44d994eba0d8e6105ed1af920e736/jiter-0.15.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:75e8a04e91432dde9f1838373cf93d23726c79d3e908d319acf0e796f85592e7", size = 516648, upload-time = "2026-05-19T10:08:14.808Z" },
+    { url = "https://files.pythonhosted.org/packages/49/9f/833c541512cd091b63c10c0381973dfe11bc7a503a818c16384417e0c81e/jiter-0.15.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a97261f1fccb8e50ecd2890a96e46efdc3f57c80a197324c6777827231eca712", size = 547382, upload-time = "2026-05-19T10:08:16.927Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/11/e7b70e91f90bc4477e8eee9e8a5f7cf3cb41b4525d6394dc98a714eb8f7f/jiter-0.15.0-cp313-cp313-win32.whl", hash = "sha256:c77496cb10bd7549690fbbab3e5ec05857b83e49276f4a9423a766ddd2afcd4c", size = 205845, upload-time = "2026-05-19T10:08:18.401Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/23/5c20d9ad6f02c493e4023e5d2d09e1c1f15fe2753c9102c544aff068a88e/jiter-0.15.0-cp313-cp313-win_amd64.whl", hash = "sha256:b15741f501469009ae0ae90b7147958a664a7dede40aa7ff174a8a4645f546d0", size = 196842, upload-time = "2026-05-19T10:08:20.131Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/11/1eb400ef248e8c925fd883fbe325daf5e42cd1b0d308539dd332bd4f7ffc/jiter-0.15.0-cp313-cp313-win_arm64.whl", hash = "sha256:5d6a60072b44c3c2b797a7ddcbcbbf2b34ea3cfd4721580fbfd2a09d9d9b84ba", size = 192212, upload-time = "2026-05-19T10:08:21.807Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/60/2fd8d7c79da8acf9b7b277c7616847773779356b92acfc9bb158452174da/jiter-0.15.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ef1fd24d9413f6209e00d3d5a453e67acfe004a25cc6c8e8484faed4311ab9e8", size = 315065, upload-time = "2026-05-19T10:08:23.218Z" },
+    { url = "https://files.pythonhosted.org/packages/46/f4/008fb7d65e8ac2abf00811651a661e025c4ba80bbc6f378450384ddd3aed/jiter-0.15.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:144f8e72cb53dab146347b91cceac01f5481237f2b93b4a339a1ee8f8878b67c", size = 339444, upload-time = "2026-05-19T10:08:24.701Z" },
+    { url = "https://files.pythonhosted.org/packages/00/55/90b0c7b9c6896c0f2a591dd36d36b71d22e09674bfef178fa03ba3f81499/jiter-0.15.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553fcac2ef2cb990877f9fc0833b8b629a3e6a5670b6b5fd58219b41a653ddc4", size = 347779, upload-time = "2026-05-19T10:08:26.408Z" },
+    { url = "https://files.pythonhosted.org/packages/51/6b/69666cec5000fd57734c118437394516c749ae8dbeea9fb66d6fef9c4775/jiter-0.15.0-cp313-cp313t-win_amd64.whl", hash = "sha256:774f93f65031856bf14ad9f59bdcab8b8cad501e5ceabd51ba3525f76937a25b", size = 200395, upload-time = "2026-05-19T10:08:28.055Z" },
+    { url = "https://files.pythonhosted.org/packages/39/04/a6aa62cd27e8149b0d28df5561f10f6cceaf7935a9ccf3f1c5a05f9a0cd8/jiter-0.15.0-cp313-cp313t-win_arm64.whl", hash = "sha256:f1e1754960f38ec40613a07e5e372df67acb3b890fb383b6fb3de3e49ddbf3c7", size = 190516, upload-time = "2026-05-19T10:08:29.35Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/d2/079f350ebf7859d081de30aa890f9e3be68516f754f3ba32366ffff4dcee/jiter-0.15.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:ac0d9ddea4350974be7a221fc25895f251a8fee748c889bdced2141c0fec1a49", size = 308884, upload-time = "2026-05-19T10:08:31.667Z" },
+    { url = "https://files.pythonhosted.org/packages/04/4e/a2c30a7f69b48c03b20935d647479106fe932f6e63f75faf53937197e05d/jiter-0.15.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:01a8222cf05ab1128e239421156c207949808acaaea2bdfd33130ae666786e86", size = 310028, upload-time = "2026-05-19T10:08:33.304Z" },
+    { url = "https://files.pythonhosted.org/packages/40/90/2e7cdfd3cf8ca967be38c48f5cf474d79f089efaf559a40f15984a77ae69/jiter-0.15.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:182226cbc930c9fab81bc2e41a4da672f89539906dadb05e75670ac07b94f71f", size = 337485, upload-time = "2026-05-19T10:08:35.259Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/11/15a1aa28b120b8ee5b4f1fb894c125046225f09847738bd64233d3b84883/jiter-0.15.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:71683c38c825452999b5717fcae07ea708e8c93003e808be4319c1b02e3d176e", size = 364223, upload-time = "2026-05-19T10:08:36.694Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/25/f442e8af5f3d0dcf47b39e83a0efd9ee45ea946aa6d04625dc3181eae3b6/jiter-0.15.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30f2218e6a9e5c18bc10fe6d41ac189c442c88eacf11bad9f28ef95a9bef00e6", size = 456387, upload-time = "2026-05-19T10:08:38.143Z" },
+    { url = "https://files.pythonhosted.org/packages/da/f4/37f2d2c9f64f49af7da652ed7532bb5a2372e588e6927c3fdd76f911db65/jiter-0.15.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5157de9f76eb4bc5ea74a1219366a25f945ad305641d74e04f59c54087091aa9", size = 374461, upload-time = "2026-05-19T10:08:39.869Z" },
+    { url = "https://files.pythonhosted.org/packages/60/28/edcfbbbf0cb15436f36664a8908a0df47ab9006298d4cd937dc08ea932d6/jiter-0.15.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c5db5527c221249a876160663ab891ace358c17f7b9c93ec1478b7f0550e5c", size = 345924, upload-time = "2026-05-19T10:08:41.668Z" },
+    { url = "https://files.pythonhosted.org/packages/47/13/89fba6398dab7f202b7278c4b4aac122399d2c0183971c4a57a3b7088df5/jiter-0.15.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:3e4540b8e74e4268811ac05db226a6a128ff572e7e0ce3f1163b693cadb184cd", size = 352283, upload-time = "2026-05-19T10:08:43.091Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/da/0f6af8cef2c565a1ab44d970f268c43ccaa72707386ea6388e6fe2b6cd26/jiter-0.15.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:62ebd14e47e9aed9df4472afcb2663668ce4d74891cd54f86bf6e44029d6dc89", size = 389985, upload-time = "2026-05-19T10:08:44.915Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/ec/b9cb7d6d29e24ee14910266157d2a279d7a8f60ee0df7fa840882976ba64/jiter-0.15.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0be6f5ad41a809f303f416d17cec92a7a725902fb9b4f3de3d19362ac0ef8554", size = 517695, upload-time = "2026-05-19T10:08:46.486Z" },
+    { url = "https://files.pythonhosted.org/packages/64/5e/6d1bda880723aae0ad86b4b763f044362448efe31e3e819635d41cb03451/jiter-0.15.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:813dfbb17d65328bf86e5f0905dd277ba2265d3ca20556e86c0c7035b7182e5a", size = 548868, upload-time = "2026-05-19T10:08:48.026Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/72/7de501cf38dcacaf35098796f3a50e0f2e338baba18a58946c618544b809/jiter-0.15.0-cp314-cp314-win32.whl", hash = "sha256:50e51156192722a9c58db112837d3f8ef96fb3c5ecc14e95f409134b08b158ec", size = 206380, upload-time = "2026-05-19T10:08:49.738Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/a9/e19addf4b0c1bdce52c6da12351e6bc42c340c45e7c09e2158e46d293ccc/jiter-0.15.0-cp314-cp314-win_amd64.whl", hash = "sha256:30ce1a5d16b5641dc935d50ef775af6a0871e3d14ab05d6fc54dff371b78e558", size = 197687, upload-time = "2026-05-19T10:08:51.088Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/c9/776b1db01db25fc6c1d58d1979a37b0a9fe787e5f5b1d062d2eaacb77923/jiter-0.15.0-cp314-cp314-win_arm64.whl", hash = "sha256:510c8b3c17a0ed9ac69850c0438dada3c9b82d9c4d589fcb62002a5a9cf3a866", size = 192571, upload-time = "2026-05-19T10:08:52.451Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/f6/45bb4670bacf300fd2c7abadbfb3af376e5f1b6ae75fd9bc069891d15870/jiter-0.15.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7553333dd0930c104a5a0db8df72bf7219fe663d731383b576bb6ed6351c984d", size = 317151, upload-time = "2026-05-19T10:08:53.867Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/68/ed635ad5acd7b73e454283083bbb7c8205ad10e88b0d9d7d793b09fe8226/jiter-0.15.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2143ab06181d2b029eedcb6af3cebe95f11bbac62441781860f98ee9330a6a6", size = 341243, upload-time = "2026-05-19T10:08:55.383Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/db/3ff4176b817b8ea33879e71e13d8bc2b0d481a7ed3fe9e080f333d415c16/jiter-0.15.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6eac374c5c975709b69c10f09afd199df74150172156ad10c8d4fd785b7da995", size = 363629, upload-time = "2026-05-19T10:08:56.928Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/24/5f8270e0ba9c883582f96f722f8a0b58015c7ce1f8c6d4571cf394e99b6b/jiter-0.15.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3b3b775e33d3bfaec9899edc526ae97b0da0bf9d071a46124ba419149a414f8", size = 456198, upload-time = "2026-05-19T10:08:58.618Z" },
+    { url = "https://files.pythonhosted.org/packages/45/5b/76fc02b0b5c54c3d18c60653156e2f76fde1816f9b4722db68d6ee2c897e/jiter-0.15.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3071db3346334beae1360b46da4606da57bf3528c167b3c38533afaf9f2c5", size = 373710, upload-time = "2026-05-19T10:09:00.151Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/52/4310821b0ea9277994d3e1f49fc6a4b34e4800caebacb2c0af81da59a454/jiter-0.15.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6694a173ecabc12eb60efbc0b474464ead1951ff65cd8b1e72100715c64512b", size = 349901, upload-time = "2026-05-19T10:09:01.621Z" },
+    { url = "https://files.pythonhosted.org/packages/93/fe/67648c35b3594fba8854ac64cc8a826d8bcd18324bbdb53d77697c60b6ef/jiter-0.15.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:a254e10b593624d230c365b6d616b22ca0ad65e63a16e6631c2b3466022e6ba8", size = 352438, upload-time = "2026-05-19T10:09:03.216Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/28/0a1879d07ad6b3e025a2750027363452ced93c2d16d1c9d4b153ffd51c91/jiter-0.15.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d8d2955167274e15d79a7a020afdd9b39c990eb80b2d89fca695d92dcfdd38ec", size = 388152, upload-time = "2026-05-19T10:09:04.741Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/78/46c6f6b56ba85c90021f4afd72ed42f691f8f84daacb5fe27277070e3858/jiter-0.15.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:acf4ee4d1fc55917239fe72972fb292dd773055d05eb040d36f4326e02cc2c0e", size = 517707, upload-time = "2026-05-19T10:09:06.231Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/cb/720662d4c88fcad606e826fef5424365527ba43ce4868a479aed8f8c507e/jiter-0.15.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:e7196e56f1cd69af1dbb07dff02dcfb260a50b45a82d409d92a06fedb32473b5", size = 548241, upload-time = "2026-05-19T10:09:08.093Z" },
+    { url = "https://files.pythonhosted.org/packages/60/e3/935b8034fd143f21125c87d51404a9e0e1449186a494405721ff5d1d695e/jiter-0.15.0-cp314-cp314t-win32.whl", hash = "sha256:7f6163c0f10b055245f814dcc59f4818da60dfe72f3e72ab89fc24b6bd5e9c52", size = 207950, upload-time = "2026-05-19T10:09:09.616Z" },
+    { url = "https://files.pythonhosted.org/packages/93/59/984fd9ece895953dad3e0880a650e766f5a2da2c5514f0eafdaaabbeb5f9/jiter-0.15.0-cp314-cp314t-win_amd64.whl", hash = "sha256:980c256edb05b78a111b99c4de3b1d32e31634b867fd1fc2cf726e7b7bba9854", size = 200055, upload-time = "2026-05-19T10:09:11.367Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/a4/cf8d779feb133a27a2e3bc833bccb9e13aa332cdf820497ebf72c10ce8c3/jiter-0.15.0-cp314-cp314t-win_arm64.whl", hash = "sha256:66b1880df2d01e206e8339769d1c7c1753bcb653efd6289e203f6f24ebada0c0", size = 191244, upload-time = "2026-05-19T10:09:12.74Z" },
+    { url = "https://files.pythonhosted.org/packages/65/43/1fc62172aa98b50a7de9a25554060db510f85c89cfbed0dfe13e1907a139/jiter-0.15.0-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:411fa4dfa5a7ae3d11491027ffb9beadec3996010a986862db70d91abba1c750", size = 305585, upload-time = "2026-05-19T10:09:35.995Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/c4/dd58fcd9e2df83666e5c1c1347bef58ce919cd8efc3ffa38aeea62ce493b/jiter-0.15.0-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:2b0074e2f56eb2dacca1689760fd2852a068f85a0547a157b82cb4cafeb6768b", size = 306936, upload-time = "2026-05-19T10:09:37.435Z" },
+    { url = "https://files.pythonhosted.org/packages/39/86/b695e16f1180c07f43ea98e73ecd21cf63fa2e1b0c1103739013784d11ae/jiter-0.15.0-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:913d02d29c9606643418d9ccfc3b72492ab25a6bf7889934e09a3490f8d3438b", size = 342453, upload-time = "2026-05-19T10:09:39.294Z" },
+    { url = "https://files.pythonhosted.org/packages/34/56/55d76614af37fe3f22a3347d1e410d2a15da581997cb2da499a625000bb5/jiter-0.15.0-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b15d3ec9b0449c40e85319bdb4caa8b77ab526e74f5532ed94bec15e2f66822c", size = 345606, upload-time = "2026-05-19T10:09:40.727Z" },
+    { url = "https://files.pythonhosted.org/packages/73/38/505941b2b092fd5bbbd60a52a880db1173f1690ae6751bed3af1c9ddcb4e/jiter-0.15.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:631f13a3d04e97d4e083993b10f4b99530e3a10d953e2eb5e196b7dc7f812ce0", size = 303769, upload-time = "2026-05-19T10:09:42.203Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/95/a06692b29e77473f286e1ec1f426d3ca44d7b5843be8ad21d7a5f3fcdcc0/jiter-0.15.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:b6c0ffae686c39bf3737be60793783267628783ea42545632c10b291105aee45", size = 305128, upload-time = "2026-05-19T10:09:43.657Z" },
+    { url = "https://files.pythonhosted.org/packages/23/85/7270d7ad41d6061a25b950c6bf91d638bd9aacb113200a8c8d57a055fd67/jiter-0.15.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d54fb5b31dea401a41af3f8a7d2512e9b6a6a005491e6166c7e4ffab9639a9c", size = 340459, upload-time = "2026-05-19T10:09:45.452Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/8d/302cb2057b7513327b4d575cff6b1d066ee6431a5357fc3f8867cd684406/jiter-0.15.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54d5d6090cdc1b7c9e780dfb04949a990adb1e301a2fc0bbcee7de4638d33f9a", size = 344469, upload-time = "2026-05-19T10:09:46.864Z" },
+]
+
 [[package]]
 name = "jmespath"
 version = "1.1.0"
@@ -1818,6 +1981,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/03/99/33c7d78a3fb70d545fd5411ac67a651c81602cc09c9cf0df383733f068c5/jsonpath_ng-1.8.0-py3-none-any.whl", hash = "sha256:b8dde192f8af58d646fc031fac9c99fe4d00326afc4148f1f043c601a8cfe138", size = 67844, upload-time = "2026-02-28T00:53:19.637Z" },
 ]
 
+[[package]]
+name = "logfire-api"
+version = "4.35.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d1/25/016b7d5e0433ae28d8a8bcb18681c48da2a0cdbf0ca8f7b2acdac2f16f4a/logfire_api-4.35.0.tar.gz", hash = "sha256:dcc073c7e337b0005f63075cf89951bacf00944b7c7420c2422b18133c8d2605", size = 83091, upload-time = "2026-06-02T14:55:58.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/3a/861f2040b251aa12b653b104c314b7d140cb44a6ab19cb141535aa72beb9/logfire_api-4.35.0-py3-none-any.whl", hash = "sha256:c8eb8f49c261c09b3d815b22ecba1c5224e8ba9aa9b546b0afcdb13a89fa6bfe", size = 131026, upload-time = "2026-06-02T14:55:55.252Z" },
+]
+
 [[package]]
 name = "lupa"
 version = "2.8"
@@ -1999,6 +2171,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/40/44/3ee09a5b60cb44c4f2fbc1c9015cfd6ff5afc08f991cab295d3024dcbf2d/lxml-6.1.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7da13bb6fbadfafb474e0226a30570a3445cfd47c86296f2446dafbd77079ace", size = 3508860, upload-time = "2026-04-18T04:32:48.619Z" },
 ]
 
+[package.optional-dependencies]
+html-clean = [
+    { name = "lxml-html-clean" },
+]
+
+[[package]]
+name = "lxml-html-clean"
+version = "0.4.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "lxml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9a/a4/5c62acfacd69ff4f5db395100f5cfb9b54e7ac8c69a235e4e939fd13f021/lxml_html_clean-0.4.4.tar.gz", hash = "sha256:58f39a9d632711202ed1d6d0b9b47a904e306c85de5761543b90e3e3f736acfb", size = 23899, upload-time = "2026-02-27T09:35:52.911Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/76/7ffc1d3005cf7749123bc47cb3ea343cd97b0ac2211bab40f57283577d0e/lxml_html_clean-0.4.4-py3-none-any.whl", hash = "sha256:ce2ef506614ecb85ee1c5fe0a2aa45b06a19514ec7949e9c8f34f06925cfabcb", size = 14565, upload-time = "2026-02-27T09:35:51.86Z" },
+]
+
 [[package]]
 name = "markdown-it-py"
 version = "4.2.0"
@@ -2450,6 +2639,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b1/dc/d358a16a6fec86cf736b8fbe67386044b3fa2aded1a80cff90e836799301/numpy-2.4.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:40c71d50a4da1a7c317af419461052d3911a5770bfc5fd55baf52cc45e7a2c20", size = 12504085, upload-time = "2026-05-15T20:25:16.667Z" },
 ]
 
+[[package]]
+name = "openai"
+version = "2.41.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/36/4c926a91554483977608951360c18c2e911592785eb87a6437813f6123f7/openai-2.41.1.tar.gz", hash = "sha256:23d617a0432457ad844973bee8f540be9da90894f7c5686852d2d365da058f57", size = 783584, upload-time = "2026-06-10T16:10:37.667Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/74/925d7b3892927e9804aaf58d374a45dc28e4420ff90e992272b77286343e/openai-2.41.1-py3-none-any.whl", hash = "sha256:a939565f350cb7443cb843b801b88c716ac8024b492fb94ca269d5f6b1bbefd6", size = 1353380, upload-time = "2026-06-10T16:10:35.756Z" },
+]
+
 [[package]]
 name = "opentelemetry-api"
 version = "1.41.1"
@@ -2945,6 +3153,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fd/7b/122376b1fd3c62c1ed9dc80c931ace4844b3c55407b6fb2d199377c9736f/pydantic-2.13.4-py3-none-any.whl", hash = "sha256:45a282cde31d808236fd7ea9d919b128653c8b38b393d1c4ab335c62924d9aba", size = 472262, upload-time = "2026-05-06T13:43:02.641Z" },
 ]
 
+[[package]]
+name = "pydantic-ai-slim"
+version = "1.106.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "genai-prices" },
+    { name = "griffelib" },
+    { name = "httpx" },
+    { name = "opentelemetry-api" },
+    { name = "pydantic" },
+    { name = "pydantic-graph" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2b/45/2afc9100a7c370d8ac37bdfccfb54f46fc99da3bdce63f07c32c37807ebc/pydantic_ai_slim-1.106.0.tar.gz", hash = "sha256:e265598c8ee0e903ebb02d0494bb232be4cc8aa463ba1a55aa743cf34135dacf", size = 773504, upload-time = "2026-06-05T01:29:09.129Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/72/d9/a2785c576e3519a72a5bbc0e12027c542b265ef6eea1aa72b9c440ac2531/pydantic_ai_slim-1.106.0-py3-none-any.whl", hash = "sha256:0dd7a99ea3fa89b490098406c2240ba7d75c327eea094c3fd057dd7aa9f3d163", size = 957617, upload-time = "2026-06-05T01:28:59.979Z" },
+]
+
+[package.optional-dependencies]
+openai = [
+    { name = "openai" },
+    { name = "tiktoken" },
+]
+
 [[package]]
 name = "pydantic-core"
 version = "2.46.4"
@@ -3061,6 +3294,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4b/2d/69abac8f838090bbecd5df894befb2c2619e7996a98ddb949db9f3b93225/pydantic_core-2.46.4-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:d51026d73fcfd93610abc7b27789c26b313920fcfb20e27462d74a7f8b06e983", size = 2193071, upload-time = "2026-05-06T13:38:08.682Z" },
 ]
 
+[[package]]
+name = "pydantic-graph"
+version = "1.106.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "logfire-api" },
+    { name = "pydantic" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/9b/dd6826cf21eedd96a7482302be51ba6087095acbe828362135de2a505092/pydantic_graph-1.106.0.tar.gz", hash = "sha256:55afa33df4f699ed5c1185f81b6a06e2161958f1aa0c20742b2dae5745e84cce", size = 62567, upload-time = "2026-06-05T01:29:11.833Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9b/e9/0058f0b98f5992e715a0a50128f6c3cc7946cc242d471f6e850efdf03f0c/pydantic_graph-1.106.0-py3-none-any.whl", hash = "sha256:e6bb61aef0fdb49185a81142d311f94fc3315329345471d12cab85ab5845221f", size = 80099, upload-time = "2026-06-05T01:29:04.219Z" },
+]
+
 [[package]]
 name = "pydantic-settings"
 version = "2.14.1"
@@ -3407,6 +3655,127 @@ hiredis = [
     { name = "hiredis" },
 ]
 
+[[package]]
+name = "regex"
+version = "2026.5.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/0e/49aee608ad09480e7fd276898c99ec6192985fa331abe4eb3a986094490b/regex-2026.5.9.tar.gz", hash = "sha256:a8234aa23ec39894bfe4a3f1b85616a7032481964a13ac6fc9f10de4f6fca270", size = 416074, upload-time = "2026-05-09T23:15:19.37Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/ed/0ad2c8edf634918eb4484365d3819fa7bd7f58daf807fe7fb21812c316e5/regex-2026.5.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a9e1328e17c84c1a5d22ec9f785ecef4a967fab9a42b6a8dc3bcbebd0a0c9e44", size = 489438, upload-time = "2026-05-09T23:11:29.374Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a9/4ed972ad263963b860b7c3e86e0e1bcc791def47b43b8c8efe57e710f139/regex-2026.5.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bfe1ce50cbfb569d74e1e4337da6468961f31dbea55fd85aa5de59c0947a805a", size = 291270, upload-time = "2026-05-09T23:11:33.254Z" },
+    { url = "https://files.pythonhosted.org/packages/16/81/075930d9fa28c4ea1f53398dd015ee7c882f623539759113cda1257f4b82/regex-2026.5.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15ee42209947f4ca045412eae98416317238163618ace2a8e54f99586a466733", size = 289198, upload-time = "2026-05-09T23:11:35.769Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/c8/5cdfbf0b5dc6599e1b6131eff43262e5275d4ec3469ce10216061659aadb/regex-2026.5.9-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4bb445ff3f725f59df8f6014edb547ee928ec7023a774f6a39a3f953038cbb2", size = 784765, upload-time = "2026-05-09T23:11:37.689Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/ca/ae5fd6edc59b7f84b904b31d6ec39a860cbcecd10f64bd5a062ca83a4864/regex-2026.5.9-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:446ddd671e43ab535810c4b21cff7104945c701d4a14d1e6d1cd6f4e445a8bea", size = 852115, upload-time = "2026-05-09T23:11:39.973Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/ce/a91cf555afb51f3b74a182e24ba073b91ea7bb64592fc4b315c111bb19fd/regex-2026.5.9-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7b92817338591505f282cf3864c145244b1edcf5381d237038df955001091538", size = 899503, upload-time = "2026-05-09T23:11:42.48Z" },
+    { url = "https://files.pythonhosted.org/packages/55/7f/725a0a2b245a4cf0c4bab29d0e97c74285d94136a65d1b55a6459a583502/regex-2026.5.9-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6b8a143aca6c39b446ea8092cde25cc8fe9304d4f5fecfbc1a9dbb0282703c2", size = 794093, upload-time = "2026-05-09T23:11:44.681Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/2a/996efbd59ce6b5d4a09e3af6180ceb62af171f4a9a6fb557d2f0ae0d462b/regex-2026.5.9-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0f03aa6898aaaac4592479821df16e68e8d0e29e903e65d8f2dfb2f19028a989", size = 786234, upload-time = "2026-05-09T23:11:46.882Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/0a/8731e8b8806174c9cdd5903f80a14990331c1f42fc4209b540952e9e010d/regex-2026.5.9-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ed457d8e98ae812ed7732bef7bf78de78e834eae0372a74e23ca90ef21d910f9", size = 769895, upload-time = "2026-05-09T23:11:49.324Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/0b/932473194bd563f342a412ae2ffbbd6da608306a2bc4e99249a41c2b0b92/regex-2026.5.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:71b61c5bfe1c806332defc42ad6c780b3c55f661986d7f40283a3a88274b4c00", size = 774991, upload-time = "2026-05-09T23:11:51.261Z" },
+    { url = "https://files.pythonhosted.org/packages/98/80/9523d196010031df25f7177ee0a467efbee436324038e5d99def17a57515/regex-2026.5.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3b1e39888c5e0c7d92cea4fc777396c4a90363b05de75d02eb459a4752200808", size = 848790, upload-time = "2026-05-09T23:11:53.232Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/07/56987b35e89edf47e4a38cf2845aeee476bfa688a6bdbd3e820cda461dc1/regex-2026.5.9-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:6ba42b2e7e7f46cf68cc6a5ca36fa07959f9bbd9c6bdcc47b6ee76549a590248", size = 757679, upload-time = "2026-05-09T23:11:55.82Z" },
+    { url = "https://files.pythonhosted.org/packages/04/2a/ff713fff0c566507c06a4ce2dc0ae8e7eeebc88811a95fc81cf1e7d534dd/regex-2026.5.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:c010eb8caca74bdb40c07498d7ece26b4428fd3f04aa8a72c9ac6f79e8faaac6", size = 837116, upload-time = "2026-05-09T23:11:57.934Z" },
+    { url = "https://files.pythonhosted.org/packages/77/90/df6d982b03e3614785c6937ba51b57f6733d97d2ee1c9bc7531dbfab3a54/regex-2026.5.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a6a563446a41adc451393dc6b8e6ad87979efaee3c8738690a8d1b08ebead1b4", size = 782081, upload-time = "2026-05-09T23:11:59.607Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/8a/4e88a5f7c3e98489aac4dd23142723d907b2a595b4a6abcbacabefeded09/regex-2026.5.9-cp310-cp310-win32.whl", hash = "sha256:954cc214c04663ee6d266fc61739cad83054683048de65c5bd1d640ad28098ac", size = 266247, upload-time = "2026-05-09T23:12:01.116Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/40/4b224cb0582b2dca1786726e6cdabe26abbf757d7f6718332f186da155d2/regex-2026.5.9-cp310-cp310-win_amd64.whl", hash = "sha256:b310768746dd314ea6e2ff4cc89ef215426813396ff4e94ee8e6f7096c8b6e03", size = 278416, upload-time = "2026-05-09T23:12:03.2Z" },
+    { url = "https://files.pythonhosted.org/packages/12/4d/014fbe803204cab0947ee428f09f658a29632053dde1d3c6176bb4f0fd4c/regex-2026.5.9-cp310-cp310-win_arm64.whl", hash = "sha256:19c16ceb4a267a8789e25733e583983eeab9f0f8664e66b0bd1c5d21f14c2d4b", size = 270413, upload-time = "2026-05-09T23:12:04.649Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/dc/c1f2df4027e82fc54b5a473e4b250f5139faca49a0fbe29a48668d228f34/regex-2026.5.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ccf5249114cc3e772ecdd88a98a86eca0fd74c61ce32a94743758c083fc05d48", size = 489445, upload-time = "2026-05-09T23:12:06.111Z" },
+    { url = "https://files.pythonhosted.org/packages/03/d2/59f01110660081cce9c0bc30ebd0b5ee250dacf658e3248ed92f01e0e8ee/regex-2026.5.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46f1326ca6e65b0879d23ca302c0f2415aad42ff0309b9c818e7949fe19a41d8", size = 291271, upload-time = "2026-05-09T23:12:07.731Z" },
+    { url = "https://files.pythonhosted.org/packages/58/b6/14b2c84ff90ddb370c81d27503f4a0fcf071496416f4855f6cc8c5d81c35/regex-2026.5.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ef31cbfe458e21c6122ba8150ff060e0c7789ed0d26eb423f25472584920b555", size = 289212, upload-time = "2026-05-09T23:12:09.266Z" },
+    { url = "https://files.pythonhosted.org/packages/03/d0/4db86529117320de0c84afd90e70bb47434625875e34fcef9d8c127c5b16/regex-2026.5.9-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:992604d02e6d9c6d786c24a706a71ecffe1020fc1ef264044474cd81fa2c3919", size = 792310, upload-time = "2026-05-09T23:12:11.416Z" },
+    { url = "https://files.pythonhosted.org/packages/07/78/fe4800cd322f862ecffd2d553409b20d80650e5ed71b9d178f853d020b82/regex-2026.5.9-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c9411dd64ca95477225734a93dfc8583b51916b8d5942f99d6cac21e09965451", size = 861721, upload-time = "2026-05-09T23:12:13.681Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/d0/b3618a895dd8feb897c61bb2954edd265e1767d82a01d53065d5871127a3/regex-2026.5.9-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3dd4a3ff360dfb836fecdb93a4598f9d6e2ac81e3e397125145c6221bf58cf4c", size = 906460, upload-time = "2026-05-09T23:12:15.443Z" },
+    { url = "https://files.pythonhosted.org/packages/33/6f/1481597e859ef19508b345eec4afd1416ed6e6b459c75a64026ef193aecf/regex-2026.5.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a661a7d270a61f7cf460caee8b9fa2d5ef9e5c681234bcb9e0fe14f488e7dfc", size = 799843, upload-time = "2026-05-09T23:12:16.892Z" },
+    { url = "https://files.pythonhosted.org/packages/73/59/955734c803f59108deccba3597ae440c76b62a652733c0006e6243758420/regex-2026.5.9-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f079e50a0d3cc3cd5091fa9ff45869a2e6b2cd35895731edafb0327901a8d86d", size = 773610, upload-time = "2026-05-09T23:12:19.127Z" },
+    { url = "https://files.pythonhosted.org/packages/68/8f/70c04a236d651c81881dac42ef8538bddda6121434509d0a22d9e601503b/regex-2026.5.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4ebe8f0b5ec5a5024dc4a4c59f444c4e9afc5f2abdbb8962065b75d27fb971f9", size = 781645, upload-time = "2026-05-09T23:12:20.806Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/96/05c7434d88185e5d27fe54aeb74df86bd77cd79f52f0b4eae54faa8fea70/regex-2026.5.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:97cf3bc1b7d7d2306772ec07366c80d9df00ff79e79cea32898883a646d2fae2", size = 854473, upload-time = "2026-05-09T23:12:22.465Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/c1/6e3d8202d981f3117004bf341ee74893ba4ba8a9fbaf4b94615846550a08/regex-2026.5.9-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0f9eede6a5cbdc02d4978090186390936e1776a7d1359b21e41014c609880bcf", size = 763311, upload-time = "2026-05-09T23:12:24.351Z" },
+    { url = "https://files.pythonhosted.org/packages/93/c7/e7737f1526b3fb32bd4c337fd6c71c3ebb5c8296fc34d11197e0955d2e35/regex-2026.5.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:01f0f5f55f4b64dacec85dc116d3c05fd23ad3ff037bbc73a2085775953c2611", size = 844593, upload-time = "2026-05-09T23:12:26.341Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/27/0daffb1a535bb39f422c3d200f4ab023c71110ad66a32b366bee708baba0/regex-2026.5.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1268eddd8486dc561d08eee1156e40aa3a8fe10f4bdec8fa653b455fcbffd12c", size = 789167, upload-time = "2026-05-09T23:12:27.975Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/fc/294fe4fac4f2ed67207b17471815870c1c45b3a489e08e0ac96daea16ef6/regex-2026.5.9-cp311-cp311-win32.whl", hash = "sha256:8676474c07469d6f33dd1085ca2cd45f65785f32518f2b20e36d9953ca07f994", size = 266249, upload-time = "2026-05-09T23:12:30.141Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/b0/8dce459f6245bcf8f6e9f23ac9569f1a0f15c131cc0745e82b43226204cf/regex-2026.5.9-cp311-cp311-win_amd64.whl", hash = "sha256:246de9d60aa3f8538b519834dd95cbf276ea263d6a7bd5a3666dc3fa0230505b", size = 278423, upload-time = "2026-05-09T23:12:31.676Z" },
+    { url = "https://files.pythonhosted.org/packages/db/8d/f9aeff6ad63a3ef720386f2907e6d34a35a510a6e498ebad28b0fb3f6ab6/regex-2026.5.9-cp311-cp311-win_arm64.whl", hash = "sha256:d726ca3f0d76969bf1e8e477d160d3d666bbf999f6860bd314889e5345782046", size = 270420, upload-time = "2026-05-09T23:12:33.194Z" },
+    { url = "https://files.pythonhosted.org/packages/50/9b/6550044bc44e17c84d312c031c2ec42fbdb6a4ec4e29093be3a172d08772/regex-2026.5.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57eeeb05db7979413dec5438f2db21d7ecbba787cde7a711df1a6f6df672aa06", size = 490451, upload-time = "2026-05-09T23:12:34.72Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/95/fc7ba4303b5a0f92446a12ee6778ef2c6c799233f5060042a31bf390cfe9/regex-2026.5.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:398c521292f4c7fb807001dcd54694d3a1fcafc179a36ad9cc56f98df85930b6", size = 292112, upload-time = "2026-05-09T23:12:36.285Z" },
+    { url = "https://files.pythonhosted.org/packages/54/4b/ee27938d1b2c443e89a9a10e00d2d19aa5ee300cd3d61140644e93bb083e/regex-2026.5.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f7a7c26137296beba7784de6eba69c6a93a63ccebc385e4962fe67e267a91225", size = 289599, upload-time = "2026-05-09T23:12:38.089Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/dd/ba103dc19614e25f3880800ca67ce093d6e21b325d72b8383c7bf906e9fa/regex-2026.5.9-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6441cc660d76107934a09c22167200839a0e89604a6297f78a974e66e931d2c0", size = 796732, upload-time = "2026-05-09T23:12:40.062Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/e7/f035b4fd858b050b0080bf302968dc0f59ba34e391872d54936758e6844e/regex-2026.5.9-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:91328f1c23d47595ca3ef0a7557fa129c5a23404b775c770697d2f35b33e0107", size = 865440, upload-time = "2026-05-09T23:12:42.059Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/51/8cd301ecc899aea28124357f729f4272f44de7806fc7ca02490bfbe253e8/regex-2026.5.9-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:93a7860539414dddaefba2b40f8771765ae17949d4c7182b876ce429e11a8309", size = 912329, upload-time = "2026-05-09T23:12:44.373Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/1e/3fbe2fa1e8cebd62f3bb7d3321cff1640aca2e240b51d9bd624aad949260/regex-2026.5.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd2810d22146b6d838acc5ec15602cb6b47920aa4e33015df3868eedfd20bab8", size = 801239, upload-time = "2026-05-09T23:12:46.268Z" },
+    { url = "https://files.pythonhosted.org/packages/17/2f/6f6008682bf2cf98040a0d3153a8e557b6ab728d7713d045cee4ce544ab8/regex-2026.5.9-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daff2bdbaf1d23e52fdff7c0b7bc2048b68f978df6a4d107ac981f94caef2e66", size = 777054, upload-time = "2026-05-09T23:12:48.051Z" },
+    { url = "https://files.pythonhosted.org/packages/19/2b/eee0d20a6842ba04df4b8847a920b57ef56853f14ef85405473e586b605a/regex-2026.5.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4eeb011098fcb77af513dcef521a3dbecbf8849b1e38940759d293b7a93f5026", size = 785098, upload-time = "2026-05-09T23:12:49.851Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/98/6fc1e6410feefb92159edaed5041992bfe390e8d26c721865434acbca558/regex-2026.5.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ea9c8ecfa1b73c73b626534d6626e5340d429630943672b8480724f44e84b962", size = 860095, upload-time = "2026-05-09T23:12:51.666Z" },
+    { url = "https://files.pythonhosted.org/packages/18/a3/bd855e0f2cb1a978ecf6fa6bb69632dd9c3f6ea3b81cde62fde14c9daec7/regex-2026.5.9-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:cd2846168eb9ee3c513902bc8225409cb1caab31d04728b145171fa1625d9621", size = 765762, upload-time = "2026-05-09T23:12:53.413Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/66/0ae8c092e60b14c79d24f8e0b7f0aea5bfbffdcab00b5483d13404d3c3a5/regex-2026.5.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:39617fb0cde9c0e6306dc70e3bfc096f3da793219879f7ae7aa341a69fbdcf6d", size = 852100, upload-time = "2026-05-09T23:12:55.256Z" },
+    { url = "https://files.pythonhosted.org/packages/21/de/8dfde60fc1b21c946a893ba273403b72617edb261370cb1087099a83f088/regex-2026.5.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fd03c4f0e33280d15cae17159b899245d6b7c53d21def19b263b39655061f5ce", size = 789479, upload-time = "2026-05-09T23:12:57.573Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/1c/bdcc98f9a4af4fdd166c74941174619ccff4726d3ce32faa8e9a2ecd38dd/regex-2026.5.9-cp312-cp312-win32.whl", hash = "sha256:164eba9b755ea6f244b0d881196fbc1fac09714e9782c9e2732b813142033c8e", size = 266699, upload-time = "2026-05-09T23:12:59.14Z" },
+    { url = "https://files.pythonhosted.org/packages/78/87/240d36864f9e48ace85f72e79ced97ceb7f27ce87739a947dcb834b4e6bc/regex-2026.5.9-cp312-cp312-win_amd64.whl", hash = "sha256:86f40a5d6444db30a125c9c9177e6b25dad981cbc37451fd838f145e6edac92e", size = 277783, upload-time = "2026-05-09T23:13:00.789Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/b5/7b30f312b0669dff5beebe5b0989dc2d1a312b1a44fab852199c387a5b96/regex-2026.5.9-cp312-cp312-win_arm64.whl", hash = "sha256:96f5f58b54a063d7ea9dca08e1cf57bfe10499c4d579ee672da284f57f5f0070", size = 270513, upload-time = "2026-05-09T23:13:02.426Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/da/797e91ecec6f84135da778ddce78c20e0af5d2a15c26f87a81bc3eadb6db/regex-2026.5.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d626b84406444b165fc0ba981604edea39f0588ff1f92baa23fe50799ea9afdb", size = 490303, upload-time = "2026-05-09T23:13:04.382Z" },
+    { url = "https://files.pythonhosted.org/packages/44/da/bf30abaaa737b58f4a4b8c4a03659e02fd92092c822e0197ed9e0daab917/regex-2026.5.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d7bdc0ab8f3dd7e1b4f9ab88634e13374669db86bb3c72e8292f07ae313f539f", size = 292019, upload-time = "2026-05-09T23:13:06.022Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e7/d0eaf5713828417b9e5648cf81fa9bacd4961f6ab98c380c2034f8716e35/regex-2026.5.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a8820737949116ffff55fe18f9fc644530063ba6ebfcb8314239416e78f1347c", size = 289468, upload-time = "2026-05-09T23:13:08.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9b/b3fdd62b003baa1a9b593cd8c8699c9651c2e80cc21a5c715707983c42d7/regex-2026.5.9-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0fbdbac82cb3e4450d0ccde7d7a35607f4cb2dd9fba4b8b69bfaf8c9fa6aed", size = 796749, upload-time = "2026-05-09T23:13:10.573Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/30/66ab84588765f5b4b271a9ca09ef7ce2b87caa95176ec3d2ad65d7bc4902/regex-2026.5.9-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:57e8915c7986aa33d25e4d3629cef711cd2863f2961b10409f0c04cb8b7d9020", size = 865445, upload-time = "2026-05-09T23:13:12.523Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/89/f05169e8588aac365f35ffc7f3bc3184f095ef4cfded7cfaa3c7fd5dbd89/regex-2026.5.9-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508f56a89ba9cb26e4168cbc37dbd60a28d82430a9e18ad1d25fe0883c314ca2", size = 912322, upload-time = "2026-05-09T23:13:14.281Z" },
+    { url = "https://files.pythonhosted.org/packages/30/e1/c93444052cf41581f3c884ab3fb5823daf0992f11cd4388d4275ca610558/regex-2026.5.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6d189041f15691cfa2b6c4290448ec221244d225b3f5fe9e7771b34ffcdf6e2", size = 801269, upload-time = "2026-05-09T23:13:16.569Z" },
+    { url = "https://files.pythonhosted.org/packages/50/fe/0cf96b882f540e62e8b9956599798203d599c44cf4c77917ca27400ff69b/regex-2026.5.9-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e82db382b44d0111b22601c509c89f64434816c9e0eef9d1989cda8cc6ff1c04", size = 777085, upload-time = "2026-05-09T23:13:18.675Z" },
+    { url = "https://files.pythonhosted.org/packages/23/5c/d78d4924e7fc875557b9e9b768423925fdfaac5549d06da7810019a9bd26/regex-2026.5.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2acfb48634f64996b57f90f39afa692ff362162722581921fe92239a59960f3c", size = 785153, upload-time = "2026-05-09T23:13:20.525Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e0/5214774090e7b4524dcea3e3c4aa74141d43043f8beb49c1599db1c8b53a/regex-2026.5.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d29eebfc9525db68cad3c97eedd7f754fa265aa5cd0cf4f863b2421e1b48fc9f", size = 860164, upload-time = "2026-05-09T23:13:22.263Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/e1/4a57a83350319b1271f0d7a249b8672513ed928b237a741631270de6caea/regex-2026.5.9-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:debb893095e944091c16e641a6e33c1b0f4cb61ab945ec5afbf53ce7068834d8", size = 765731, upload-time = "2026-05-09T23:13:24.277Z" },
+    { url = "https://files.pythonhosted.org/packages/12/f4/499e74a20c156fc75836ee04a72a38d1a063978f600937f9760467beb1b0/regex-2026.5.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d659eee77986549c9ea45b861c7567e44d6287c3dc9a4565478853f7b9fe2ff6", size = 852062, upload-time = "2026-05-09T23:13:26.125Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/92/7eebc0d0a01e78629695f342ba17e0deaff8fb45e79cc0d7b98287da6e3e/regex-2026.5.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2efa205e6d98b24d1f3ab395c11aa15cdf10935bca283d0285e0499c284fba21", size = 789577, upload-time = "2026-05-09T23:13:27.814Z" },
+    { url = "https://files.pythonhosted.org/packages/05/a4/018e71f7d2ad48c1ebe6d3ae0026f9b7cb4802fd15c7cc02fdf724355102/regex-2026.5.9-cp313-cp313-win32.whl", hash = "sha256:f3844f134e834076677dd369976e9f5068679fcb8e50102fdf6b7ac96a3ec127", size = 266691, upload-time = "2026-05-09T23:13:29.549Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/1d/861a93719fb9ee7dbfc3761b3797b7a3e112a5d42c6129459d2d741be9b5/regex-2026.5.9-cp313-cp313-win_amd64.whl", hash = "sha256:3527bb4942d2c14552155406cdedd906567456821848aed1cb4933a391bf5eca", size = 277747, upload-time = "2026-05-09T23:13:31.859Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/c6/0a2436ae4da1ba76e51cb98943c6838a9a721faa40ebe2dce07694ae34e3/regex-2026.5.9-cp313-cp313-win_arm64.whl", hash = "sha256:56a33f191f17d8c417f99945ebdc1e691d3af9605d86ec68c7e54a57e3e17af6", size = 270500, upload-time = "2026-05-09T23:13:33.525Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/e9/d21346f7b60ed58789371358ed66b09d00f832e1bd7c06e55d9da5679882/regex-2026.5.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:01f28d868834624c934b8d2e0aa1c8341337e37831f4a012f18a5afcba4cbaf3", size = 494172, upload-time = "2026-05-09T23:13:35.935Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/43/fd1177a2032037c681baecdb3422ee4e1424aec4e4f470ef47793d325274/regex-2026.5.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:48036f6374aaa79eb3b754ec29c61d1c6b1606749d705a13f8854fa2539671f6", size = 293952, upload-time = "2026-05-09T23:13:38.307Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7d/9fbf919768368d3f8a4f6c692cf2aa61e482b2b81ec6a298ace4cbf02480/regex-2026.5.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b96350aa424e79d4fd6b567b344dcbe2b2d6bfc48dfe7717587e1fa6d43da6ff", size = 292314, upload-time = "2026-05-09T23:13:40.353Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/6c/e41bfeecb589716843e7c4df09ba46ff2a42961457afece19059d85caeef/regex-2026.5.9-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f3af7a4903c5c04a11a196a5aa75cdd7dd3f8508132f9fb3259d9f5908e3b88", size = 811681, upload-time = "2026-05-09T23:13:42.543Z" },
+    { url = "https://files.pythonhosted.org/packages/87/83/a5c1c525fba0aa656e88ad0face0b1829788ef4c2fb6b26df58aa1151b84/regex-2026.5.9-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7e87577720152d2caae19fe2baaf1f8d5ca12091e9e229f03915c37d1e4b9178", size = 871135, upload-time = "2026-05-09T23:13:44.326Z" },
+    { url = "https://files.pythonhosted.org/packages/18/d4/80882e799e440dd878b0979cbebf8fa4d54624a332c83037c7a701649e3f/regex-2026.5.9-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c8b9b9d294cfea3cd19c718ade7cc93492b2c4991abd9a68d0b3477ae6d8e100", size = 917265, upload-time = "2026-05-09T23:13:47.295Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/ff/8db60211e2286e396aad7dc7725356c502bff0901ea05bd6cdc2e1a042b9/regex-2026.5.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:728d8bfd28a8845c8b6bc5dc7ce010453d206396786c0765c2740cb65f37791e", size = 816311, upload-time = "2026-05-09T23:13:49.885Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/47/742ef579c61730f8d268e5cf1f9ce0e37e2ea041ad0f5644724f2378e463/regex-2026.5.9-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7e30b874d341fac767d7df5a0870540541c2c054b80cfaac116e8d367a8a7ff2", size = 785498, upload-time = "2026-05-09T23:13:52.25Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/ab/cb0999802dcb0fb95b1ab005e8d4163d8afdd67efc2cb6b6630ac13f8cb1/regex-2026.5.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fd190e88a895a8901325fad284a3f74ea52b1da8525b76cc811fa9b1edf0ce2b", size = 801348, upload-time = "2026-05-09T23:13:54.127Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/8ca59a24c55bc34d166eefaf3717bd77772f329fdbf984d86581e0a3571c/regex-2026.5.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:8e76e8161ad00694cfce6767d5dea860c6391ac5b83e5c3a39661e696f11fc7e", size = 866493, upload-time = "2026-05-09T23:13:56.067Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/3d/30f2ae62cef3278bb5bb821f467277a55fb73f01032cf85997e15e8289a8/regex-2026.5.9-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ddda5340e6c01a293027dd46232fa79eaff1b48058ce7a98f572b6445b088041", size = 772811, upload-time = "2026-05-09T23:13:57.867Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/ae/7d2089bcd78ad0c0161bc684339df50032acb438a7bd3305e7ddb1193cec/regex-2026.5.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:205109e96b3cf5adf8f4cd62bedde9487feb282b9497a3535451e5a24cd706a0", size = 856584, upload-time = "2026-05-09T23:13:59.679Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/29/92ff47f75990131ea4f24ba17819e5a9d141e10819807e09addd73409af6/regex-2026.5.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dfbe4579b9f08036aa7d101d1835437a20783574ac66327e6b29b4018a138081", size = 803453, upload-time = "2026-05-09T23:14:01.978Z" },
+    { url = "https://files.pythonhosted.org/packages/04/99/eff29f1037dcab36702c9ee5d6858cf1ce2336ea8ea2987f64245b99ea5e/regex-2026.5.9-cp313-cp313t-win32.whl", hash = "sha256:ed2c9e8068b614c574d8d30e543d617cf5379b0535d46f97ef00e904745a08b5", size = 269951, upload-time = "2026-05-09T23:14:03.661Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/9d/8870b8981d27b22cda77bb26a5ac7ebfa9c7d9e0dea195a834a82380e748/regex-2026.5.9-cp313-cp313t-win_amd64.whl", hash = "sha256:b46b0f094dc1d3b90356c85a0bd2c9bafc4a6a190b9d6f8ddd5a033b6e088ed4", size = 281240, upload-time = "2026-05-09T23:14:05.56Z" },
+    { url = "https://files.pythonhosted.org/packages/72/b1/3379415e8f135c13ac551353397cc4fe97b4978f3cac73c5fcbcded548b8/regex-2026.5.9-cp313-cp313t-win_arm64.whl", hash = "sha256:872acc074bd29ffc9913ecdfedf6ea77502312ca44a4aa0d3779089c6069d8de", size = 272383, upload-time = "2026-05-09T23:14:07.843Z" },
+    { url = "https://files.pythonhosted.org/packages/13/3e/9c3cd292d8808b3645a2ce517e200179b6d0e903f176300bd8b542e14de5/regex-2026.5.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:1bd7587a2948b4085195d5a3374eaf4a425dc3e55784c038175355ecf3bbbf8a", size = 490376, upload-time = "2026-05-09T23:14:09.64Z" },
+    { url = "https://files.pythonhosted.org/packages/60/70/d43ee8a2ca0a8b68d167f21658b85520ac0574617c7f320367c5047f7556/regex-2026.5.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:dea2e88e1cce4522496cce630e11e67b98b7076620bc4336c3f674bc21a375f4", size = 291964, upload-time = "2026-05-09T23:14:11.424Z" },
+    { url = "https://files.pythonhosted.org/packages/21/91/9d50b433828d8e74196904e168a43abf1e6e88b2a15d47ed742456720c37/regex-2026.5.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2099f7e7ff7b6aa3192312650a56e91cc091e49d50b04e4f6f8b6e28b3b27f1c", size = 289682, upload-time = "2026-05-09T23:14:13.123Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d2/b835e3cafbb9d977736912436259ff551d60919f7d7b3d37d46659c63564/regex-2026.5.9-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecd353045824e4477562a2ac718c25799cdaaa41f7aa925a806a8a3e6848a5b9", size = 796996, upload-time = "2026-05-09T23:14:14.923Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/a6/9f992d00019166b9de01c546dd4549bc679f2a68df11b877740b0760b7c2/regex-2026.5.9-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:65c8c8c37377794bd5b2f3ebe51919042bf17aec802e23c833d89782ed0c78af", size = 866089, upload-time = "2026-05-09T23:14:17.757Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/08/4d32af657e049b19cb62b02e46e38fe1518797bfb2203ee93a510b21b0dc/regex-2026.5.9-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5b73ab8afcf66c622db143d1c6fda4e58e4d537ee4f125229ad47b1ab80f34c0", size = 911530, upload-time = "2026-05-09T23:14:20.353Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/27/2af43dd1dc201d1fecefda64a45f4ad0995855b92724f795a777b402ee69/regex-2026.5.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0de5cf193997384ed2ca6f1cd4f78055b255d93d82d5a8cd6ba0d11c10b167e4", size = 800643, upload-time = "2026-05-09T23:14:22.265Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/dd/23a249047013b5321d4a60c4d2437462086f601b061776a525e5fba2a59f/regex-2026.5.9-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d641a8c9a61618047796d572a39a79b26167b0411d2c3031937b2fe2d081e2cf", size = 777223, upload-time = "2026-05-09T23:14:24.179Z" },
+    { url = "https://files.pythonhosted.org/packages/94/6a/e85ed9538cd19586d0465076a4578a12e093ce776d15f3f8ce92733a8dd6/regex-2026.5.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:24b2355ef5cc9aa5b8f07d17704face1c166fdcc2290fa7bd6e6c925655a8346", size = 785760, upload-time = "2026-05-09T23:14:26.065Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/c4/f25473209438638e947c55f9156fd8f236f74169229028cc99116380868e/regex-2026.5.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a24852d3c29ad9e47593593d8a247c44ccc3d0548ef12c822d6ed0810affe676", size = 860891, upload-time = "2026-05-09T23:14:28.17Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/f7/f4f86e3c74419c37370e91f150ae0c2ef7d34b2e0e4cdd5da046a02e4022/regex-2026.5.9-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:916714069da19329ef7de197dcbc77bb3104145c7c2c864dbfbe318f46b88b14", size = 765891, upload-time = "2026-05-09T23:14:30.06Z" },
+    { url = "https://files.pythonhosted.org/packages/26/70/704d8e13765939146b1cd0ef4e2feb71d7929727d2290f026eed10095955/regex-2026.5.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:fa411799ca8da32a8d38d020a88faa5b6f91657d284761352940ecf9f7c3bbdd", size = 851380, upload-time = "2026-05-09T23:14:32.123Z" },
+    { url = "https://files.pythonhosted.org/packages/26/29/1a13582a8460038edc38e49f64ceb0dd7c60f5caba77571f4bf6601965d9/regex-2026.5.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1e6da47d679b7010ef27556b6e0f99771b744936db1792a10ceac6547ae1503e", size = 789350, upload-time = "2026-05-09T23:14:34.799Z" },
+    { url = "https://files.pythonhosted.org/packages/73/56/3dcafe34fc72e271d62ad9a291801e88a1457bb251c132f15fcc2e5aad1a/regex-2026.5.9-cp314-cp314-win32.whl", hash = "sha256:98bd73080e8756255137e1bd3f3f00295bbc5aa383c0e0f973920e9134d7c4ad", size = 272130, upload-time = "2026-05-09T23:14:36.729Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/9c/02eebf0be95efe416c664db7fb8b6b05b7a0b06a7544f2884f2558b0526f/regex-2026.5.9-cp314-cp314-win_amd64.whl", hash = "sha256:ff8d372ac2acdc048d1c19916f27ee61bc5722728458ba6ca5052f2c72d51763", size = 280999, upload-time = "2026-05-09T23:14:39.126Z" },
+    { url = "https://files.pythonhosted.org/packages/70/5a/1dd1abee76cb7a846a0bcf42fdc87e5720c3c33c24f3e37814310a513d9f/regex-2026.5.9-cp314-cp314-win_arm64.whl", hash = "sha256:e1d93bf647916292e8edcec150c07ddf3dc50179ccaf770c04a7f9e452155372", size = 273500, upload-time = "2026-05-09T23:14:41.059Z" },
+    { url = "https://files.pythonhosted.org/packages/86/c1/c5f619b0057a7965cb78ec559c1d7a45ce8c99a35bea95483d64959a93d9/regex-2026.5.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:83d0ee4a57d1c87cb549e195ec300b8f0ec3a82eba66d835e4e2ed8634fe4499", size = 494269, upload-time = "2026-05-09T23:14:42.869Z" },
+    { url = "https://files.pythonhosted.org/packages/05/2c/5d01f1aee33de4bbe60c8452945bfc8477ca7c5ae4450f6bfe711036cb36/regex-2026.5.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d3d7eb5c9a7f6df82ed3cfac9beb93882a5cbcb5b8b157b56cb2b3b276574ac1", size = 293954, upload-time = "2026-05-09T23:14:44.822Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/fe/e8988b2ae2108c6ef71bd4aa8d87fbe257976dd0810e826cd75f701c68b6/regex-2026.5.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:075160bf16658e16d35233300b8453aac25de4cbea808d22348b6979668e924d", size = 292405, upload-time = "2026-05-09T23:14:47.211Z" },
+    { url = "https://files.pythonhosted.org/packages/79/34/d2b0937faa7859263f7f0a3c6b103a1296306be6952dc173d0154e9a2f49/regex-2026.5.9-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45375819235558a4ff1c4971dc32881f022613abdb180128f5cb4768c1765a1c", size = 811855, upload-time = "2026-05-09T23:14:49.21Z" },
+    { url = "https://files.pythonhosted.org/packages/80/fe/daf53a47457a8486db66c66c01ceb9c2303eecee3f87197f1e77eb1a736d/regex-2026.5.9-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ead4b163ac30a29574510cd4b3e2e985ac5290c05fc7095557d6a5f403fc31b5", size = 871189, upload-time = "2026-05-09T23:14:51.555Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/75/058fc4470cbfbf57d800aff1a0022b929a3f9fa553ee10a0cdf2070eb31f/regex-2026.5.9-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c6e4218fbdfbcd4f6c19efca40930d24a621bf4b48cb76bc6640543bd28ef20", size = 917485, upload-time = "2026-05-09T23:14:53.633Z" },
+    { url = "https://files.pythonhosted.org/packages/88/e7/179cfda3a28bc843b5c6cfe7f79f23489c791ed95f151083803660878432/regex-2026.5.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6351571c8a42b505eb555c0dc47d740d0fb66977dc142919eea6f4325b7c56a0", size = 816369, upload-time = "2026-05-09T23:14:56.198Z" },
+    { url = "https://files.pythonhosted.org/packages/41/90/6f0cc422071688266d344fca8462d787cba0a2c144acb25721f9a61ec265/regex-2026.5.9-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:002205cafd2a9e78c6290c7d1df277bf3277b3b7a30e0b4bb0dac2e2e3f7cb2d", size = 785869, upload-time = "2026-05-09T23:14:58.602Z" },
+    { url = "https://files.pythonhosted.org/packages/02/67/a31f1760f09c27b251ef39e9beb541f462cf977381d067faa764c2c0e393/regex-2026.5.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8abd33fef90b2a9efac5557d6033ca82d1195ed3a15fea5af15ba7b463c6a63b", size = 801427, upload-time = "2026-05-09T23:15:00.642Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/c4/1a80654597b6bc1e1ea0494824c31200e8a956abe290afae9b19a166a148/regex-2026.5.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:31037c82eccb44b7ea2e9e221d7c01429430e989a1f4b91ea5a855f6017b509a", size = 866482, upload-time = "2026-05-09T23:15:03.384Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/960724e06482c08466ff5611e242e86f80062949cdf6b4b9cc317b9dd93d/regex-2026.5.9-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:5604dfd046dc37eca90250fc3be938b076c8059fa772ac0ed6f499b0f0fb0415", size = 773022, upload-time = "2026-05-09T23:15:05.625Z" },
+    { url = "https://files.pythonhosted.org/packages/50/a8/a9979c3e7918280e93159ebcab5ef1a65116dd4f3bd6091be0eae4a126e8/regex-2026.5.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0e1b1b4e496afbb24f4a62aba855ee4f88f25578927697b340702e48c9ee6bc2", size = 856642, upload-time = "2026-05-09T23:15:07.966Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/d4/a9b732f2f0072c0ab12227483abb24fffcb9f73f8a2b203df0a6d0434735/regex-2026.5.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:be3372b9df6ddecff6486d37e19095a7b4973137caf5512407a89f4455361f41", size = 803552, upload-time = "2026-05-09T23:15:10.215Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/fe/1b3113817447a1d4155e4ac76d2e072f42c0bcba2f43fa8a0e756ea2cd91/regex-2026.5.9-cp314-cp314t-win32.whl", hash = "sha256:3ddd90103f9e5c471c49c7852ecc1fe27c7e45eb99e977aefe7caa4e779f4f58", size = 275746, upload-time = "2026-05-09T23:15:12.609Z" },
+    { url = "https://files.pythonhosted.org/packages/92/73/93d42045302636c91f2e5ef588b65b84b01428f28ec77de256b1dfdfbe5c/regex-2026.5.9-cp314-cp314t-win_amd64.whl", hash = "sha256:ca518ed29c46eecba6010b15f1b9a479314d2de409536e71b6a13aa04e3b8a77", size = 285685, upload-time = "2026-05-09T23:15:15.086Z" },
+    { url = "https://files.pythonhosted.org/packages/da/80/35b4c33c804a165a7f55289afda3ea9e3eb6d15800341a2d66455c0f1f30/regex-2026.5.9-cp314-cp314t-win_arm64.whl", hash = "sha256:5e41809d2683fcde7d5a8c87a6567ba1fb1ce0de9f31bff578de00a4b2d76daa", size = 275713, upload-time = "2026-05-09T23:15:16.98Z" },
+]
+
 [[package]]
 name = "requests"
 version = "2.34.2"
@@ -3878,6 +4247,67 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" },
 ]
 
+[[package]]
+name = "tiktoken"
+version = "0.13.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "regex" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e4/e5/5f3cb2159769d0f4324c0e9e87f9de3c4b1cd45848a96b2eb3566ad5ca77/tiktoken-0.13.0.tar.gz", hash = "sha256:c9435714c3a84c2319499de9a300c0e604449dd0799ff246458b3bb6a7f433c1", size = 38986, upload-time = "2026-05-15T04:51:27.153Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/e3/03c90dadcf5b3f82b83cee9adee60ef666b329c654f58c066af44eae0287/tiktoken-0.13.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:47b1df8d73390a24f94980c75158cdd5c56d256f16d55f30cb49c230caba9ba4", size = 1036627, upload-time = "2026-05-15T04:50:11.229Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/30/760463e5b2e8ad2bc229ae0a17ecb06727b6cbc094f08d8f65844315632e/tiktoken-0.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7d40c6c5aab171dcd6eb8455bc567bde404bb9def60cdb8c1299cc782b242bb9", size = 984699, upload-time = "2026-05-15T04:50:12.874Z" },
+    { url = "https://files.pythonhosted.org/packages/de/8a/8895f342a6b6aabd1a358e672f6f077b3ae51d0c63ca605d142db3bcd8ab/tiktoken-0.13.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:9b842981fa91accdffd48ff6408a977b7a91c3fbda55d353c3c68114d5c9d69e", size = 1118690, upload-time = "2026-05-15T04:50:14.234Z" },
+    { url = "https://files.pythonhosted.org/packages/51/e0/92557768fb0801f0d9dd9243cb9b6d342900b05e4b1006d4771f49ce233e/tiktoken-0.13.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ed5a30027cb4d8c7ca8b273d4766f3db3cf58fad9e9f3b1a68a351ffb54873d5", size = 1138423, upload-time = "2026-05-15T04:50:15.668Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b9/a3d99feeedb032ffd09cd6652077f86bdee9a70dd0b990b2b272b445d4c3/tiktoken-0.13.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7ab10f4a21c2999846940113f6dbd72e0fa06a24119feddd74cc47e85818e06d", size = 1185077, upload-time = "2026-05-15T04:50:17.19Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/93/bab868277d475dc6d2aaacd34cdd239c282f4908dcc8702e0a3311a8e032/tiktoken-0.13.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a2937ad042d49d50eac6e1ba07c5661d4bd3942a5b1e0c0d08475c4df83676e1", size = 1241702, upload-time = "2026-05-15T04:50:18.772Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/16/27e9f7e0ed76e501cfefc9fb2112df4c7bf70ca96945b15ecb7615aac860/tiktoken-0.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:44733b99bfd72b590cd0936b1c01b3b4dd73122db2d544bc1ceeb18a7678c910", size = 876565, upload-time = "2026-05-15T04:50:20.268Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/4c/1bc81f4cd53e827c4ee67ca951b5935724716049452d8dfa09b8b82372bb/tiktoken-0.13.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:7bfe1849caa65d1e1d9871817170ec497bbb7984e182012e1bdce72f66608cdb", size = 1036353, upload-time = "2026-05-15T04:50:21.757Z" },
+    { url = "https://files.pythonhosted.org/packages/75/91/10b9c7076bc02c246c853201fdbbe300a4b8c5ed7b84c25f7403f4e32655/tiktoken-0.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:91c180fe255bd5a86d8316210d2833a1d4d33d026cd86a67812f4773743c8d26", size = 984644, upload-time = "2026-05-15T04:50:23.256Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/e4/fceae98015fab47fcd49b8bd7f46145bcd187a47e0add1e5378ed67ef980/tiktoken-0.13.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:059c8ecf554eb5b41e6e054ba467b871b03277d267dee7244380aca4359747d4", size = 1119261, upload-time = "2026-05-15T04:50:24.348Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/39/fe42ad00de01a8c4a49ad8649a2c8a316835a9cad5961b11d21eac0020a5/tiktoken-0.13.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:36217497eaffc158607a3b26f065300db2aefd43b115263f3b9688ce38146173", size = 1138253, upload-time = "2026-05-15T04:50:25.505Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c4/ccee1ecccca107e9a16efcecdeeb964c325305038554d466ece65b42338f/tiktoken-0.13.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:303f7d91b4fce3baddbcde05c139091d4caa5026ac7214c1dc7ff7a71ee429ff", size = 1185747, upload-time = "2026-05-15T04:50:27.02Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/03/cd0cba295522b91eb55c6b2704f1df895f8226cfe60ab10d4d51d0cc9e69/tiktoken-0.13.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5d48843bee149630eb735a99e1f4a85b47308d21868ea63163f6e87768d3cfed", size = 1241265, upload-time = "2026-05-15T04:50:28.815Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/25/a10efd564402d82c2ff50d12057353ace447aa8007deceaa48641f63d35c/tiktoken-0.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:fc1c44cd37b43fc46bae593129164f4f281e82ea116b57a85aa81bda57eafc94", size = 876509, upload-time = "2026-05-15T04:50:30.026Z" },
+    { url = "https://files.pythonhosted.org/packages/85/8e/144bde4e01df66b34bb865557c7cd754ed08b036217ebd79c9db5e9048a9/tiktoken-0.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:32ac870a806cfb260a02d0cb70426aef02e038297f8ad50df5040bb5af360791", size = 1034888, upload-time = "2026-05-15T04:50:31.579Z" },
+    { url = "https://files.pythonhosted.org/packages/36/18/d4ac9d20956cdebca04841316660ed584c2fecdc2b81722a28bc7ad3b1e4/tiktoken-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d9980f11429ed2d737c463bb1fb78cf330caa026adf002f714aced7849a687b", size = 982970, upload-time = "2026-05-15T04:50:32.961Z" },
+    { url = "https://files.pythonhosted.org/packages/74/ed/6bb8d05b9f731f749fee5c6f5ca63e981143c826a5985877330507bd13b7/tiktoken-0.13.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3f277ebea5edd7b8bf03c6f9431e1d67d517530115572b2dc1d465326e8f88c7", size = 1115741, upload-time = "2026-05-15T04:50:34.475Z" },
+    { url = "https://files.pythonhosted.org/packages/34/de/2ca96b07a82d972b74fe4b46de055b79c904e45c7eab699354a0bfa697dc/tiktoken-0.13.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a116178fa7e1b4065bff05214360373a65cac22f965be7b3f73d00a0dbfe7649", size = 1136523, upload-time = "2026-05-15T04:50:35.782Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/dc/9dafec002c2d4424378563cf4cf5c7fb93631d2a55013c8b87554ee4012c/tiktoken-0.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2c397ddda233208345b01bd30f2fca79ff730e55731d0108a603f9bc57f6af3b", size = 1181954, upload-time = "2026-05-15T04:50:36.99Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/d0/1f8578c45b2f24759b46f0b50d31878c63c73e6bf0f2227e10ec5c5408dc/tiktoken-0.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:95097e4f89b06403976e498abf61a0ee73a7497e73fb599cb211d8197a054d91", size = 1240069, upload-time = "2026-05-15T04:50:38.221Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/90/28d7f154888610aa9237e541986beb62b479df29d193a5a0617dbb1514d0/tiktoken-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:8f2d16e7a7c783ad81f36e457d046d1f1c8af70b22aec8a13238efe531977c41", size = 874748, upload-time = "2026-05-15T04:50:39.587Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/83/b096c859c2a47c11731bf2f5885f4028b809dfe2396582883eed9cae372f/tiktoken-0.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5df5d1507bd245f1ccad4a074698240021239e455eb0bb4ced4e3d7181872154", size = 1034228, upload-time = "2026-05-15T04:50:40.988Z" },
+    { url = "https://files.pythonhosted.org/packages/53/61/c68e123b6d753e3fc2751e9b18e732c9d8bf1e1926762e736eee935d931c/tiktoken-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fe806a50664e83a6ffd56cbd1e4f5dcc6cd32a3e7538f70dc38b1a271384545", size = 982978, upload-time = "2026-05-15T04:50:42.195Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/8b/96cc178cc584e65d363134500f297790b06cd48cdeb1e8fcf7bbe60f4715/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:125bc05005e747f993a83dc67934249932d6e4209854452cd4c0b1d53fba3ba2", size = 1116355, upload-time = "2026-05-15T04:50:43.564Z" },
+    { url = "https://files.pythonhosted.org/packages/86/f5/bab735d2c72ea55404b295d02d092644eb5f7cc6205e34d35eb9abfb9ab2/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5e6358911cab4adee6712da27d65573496a4f68cf8a2b5fca6a4ad10fc5748cf", size = 1135772, upload-time = "2026-05-15T04:50:44.782Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/b9/6de04ebdf904edfaad87788011b3735087a0c9ea671b9027e1e4e965e8c8/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:975cbd78d085d75d26b59660e262736dcaed1e35f8f142cd6291025c01d25486", size = 1182415, upload-time = "2026-05-15T04:50:46.422Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/9c/470a05f3b1caf038f44880e334d47ab674e0c80d514c66b375d14d5afa10/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:75ab9bc99fa020a4c283424590ecd7f3afd70c1c281cb3fa3192a6c3af9f9615", size = 1239879, upload-time = "2026-05-15T04:50:48.052Z" },
+    { url = "https://files.pythonhosted.org/packages/42/a6/c1936d16055436cb32e6c6128d68629622e00f4768562f55653752d34768/tiktoken-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:6b1615f0ff71953d19729ceb18865429c185b0a23c5353f1bbca34a394bf60f7", size = 874829, upload-time = "2026-05-15T04:50:49.202Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/07/acb5992c3772b5a36284f742cfb7a5895aa4471d1848ac31464ad50d7fdf/tiktoken-0.13.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6eb4a5bfbc6426938026b1a334e898ac53541360d62d8c689870160cc80abd67", size = 1033600, upload-time = "2026-05-15T04:50:50.4Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e9/742e9aec30f59b9f161f7ff7cd072e02ea836c9e1c0854a8076dfcd40d5c/tiktoken-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:43cee3e5400573b2046fbf092cc7a5bc30164f9e4c95ce20714da929df48737a", size = 982516, upload-time = "2026-05-15T04:50:52.03Z" },
+    { url = "https://files.pythonhosted.org/packages/72/74/ca1541b053e7648254d2e4b42a253e1bb4359f2c91a0a8d49228c794e1a0/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7de52e3f566d19b3b11bd37eea552c6c305ad74081f736882bd44d148ed4c48d", size = 1115518, upload-time = "2026-05-15T04:50:53.543Z" },
+    { url = "https://files.pythonhosted.org/packages/46/e3/93825eaf5a4a504795b787e5d5dea07fbeb3dabf97aa7b450be8bde59c89/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:51384448aa508e4df84c0f7c1dc3211c7f7b8096325660ee5fc82f3e11b381ce", size = 1136867, upload-time = "2026-05-15T04:50:55.191Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/46/002b68de6827091d5ae90b048f326e8aad8d953520950e5ce1508879414f/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e28157350f7ebf35008dd8e9e0fdb621f976e4230c881099c85e8cf07eaa50e2", size = 1181826, upload-time = "2026-05-15T04:50:56.296Z" },
+    { url = "https://files.pythonhosted.org/packages/db/c6/d393e3185a276505182f7abd93fe714f3c444a2be9180798fa052347504e/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:165cf1820ea4a354985c2490a5205d4cc74661c934aca79dd0368232fff94e0f", size = 1239489, upload-time = "2026-05-15T04:50:57.918Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/4d/bc07d1f1635d4897a202acc0ae11c2886eaa7325c359ba4741b47bf8e225/tiktoken-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6c43a675ca14f6f2749ba7f12075d37456015a24b859f2517b9beb4ef30807ec", size = 873820, upload-time = "2026-05-15T04:50:59.528Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/93/0dd6adca026a616c3a92974566b43381eea4b475ce1f36c062b8271a9ac5/tiktoken-0.13.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaaaef47c2406277181d2086484c317bf7fc433e2d5d03ff94f56b0dcec87471", size = 1034977, upload-time = "2026-05-15T04:51:00.957Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/77/5ec6e6bc5b30bed6d93f7f2162d8f6b32437b3ba27cb527cfe004f6109c9/tiktoken-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ca8b310bd93b3772cb1b7922d915446864860f562bdfe4825c63a0aed3fb28cd", size = 983635, upload-time = "2026-05-15T04:51:02.629Z" },
+    { url = "https://files.pythonhosted.org/packages/94/b0/c8ae9aff00d625c50659b4513e707a0462c4bf5d4d6cc1b802103225c02e/tiktoken-0.13.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:32e0c12305105002c047b3bb1070b0dd9a73b0cb3b2856a8972b810e7a4f5881", size = 1116036, upload-time = "2026-05-15T04:51:04.082Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/ac/6a5dddd1d0a6018ecb389bd0353e6b4a515eb4d2286611bd0ace1937b9e1/tiktoken-0.13.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:5ba5fd62507a932d1241346179e3b39bc7bf7408f03c272652d93b3bedf5db24", size = 1135544, upload-time = "2026-05-15T04:51:05.229Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/b8/585032b4384b2f7dcdaddcb52865c83a701a420d09e3c2b4a2be1c450c57/tiktoken-0.13.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d108bc2d470fc53c8ecd24f2c0fd2b5f98c33e87cdb6aa2e9b8c5dced703d273", size = 1182217, upload-time = "2026-05-15T04:51:06.517Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/b6/993ff1ded3958215fd341a847b8e5ffeb5de473f435296870d314fc91ac4/tiktoken-0.13.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cb99cb5127449f58d0a2d5f5ccfb390d8dbdfd919c221246caaee29d8725ed51", size = 1239404, upload-time = "2026-05-15T04:51:07.843Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3d/fef7e06e3b33e7538db0ced734cf9fe23b6832d2ac4990c119c377aec55e/tiktoken-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:115c4f26ffa11caac8b54eea35c2ad38c612c20a48d35dd15d70a02ac6f51f58", size = 918686, upload-time = "2026-05-15T04:51:08.925Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/82/a7fc44582bc32ab00de988a2299bf77c077f59068b233109e34b7d6ca7e6/tiktoken-0.13.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:472527e9132952f2fbf77cd290658bacf003d4d5a3fabc18e5fbd407cbae4d9b", size = 1034454, upload-time = "2026-05-15T04:51:10.035Z" },
+    { url = "https://files.pythonhosted.org/packages/37/d0/24d8a890c14f432a05cea669c17bebeaa99f96a7c79523b590f564246411/tiktoken-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e2f67d27c9626cdd25fe33d9313c5cdb3d8d82da646b68d6eb8e7e9c20e6448", size = 982976, upload-time = "2026-05-15T04:51:11.23Z" },
+    { url = "https://files.pythonhosted.org/packages/49/b7/2ab43f62788a9266187a9bfc1d3af99ad83e5eaa25fbef168a69cd5ad14f/tiktoken-0.13.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2b920b35805cd64585a37c3dc7ce65fba4d2d36016be01e1d7942482ca29093a", size = 1115526, upload-time = "2026-05-15T04:51:12.608Z" },
+    { url = "https://files.pythonhosted.org/packages/64/39/1494321ed323ce7a14d88e3cd6cb9058625977df1c6961ddc492bd10a9f3/tiktoken-0.13.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:493af3aa28a4aaf2e3d2600a2ee717252c9bf5ab38fff94eb5a02db5ab77e5ad", size = 1136466, upload-time = "2026-05-15T04:51:13.926Z" },
+    { url = "https://files.pythonhosted.org/packages/96/d9/dfd086aa2d918c563a140720e0ce296cada1634efd2783d5cf51e05f984e/tiktoken-0.13.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6644c9c2b5cf3916f5a3641d7d12fdb3f006a7b3d9ff6acdaec44e29ab1ff91e", size = 1181863, upload-time = "2026-05-15T04:51:15.025Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/68/a18b4f307086954fdae32714cb4f85562e34f9d34ab206e61f1816aa6018/tiktoken-0.13.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5cb65b60b9408563676d874a3a4ee573370066f0dc4e29d84e82e989c6517424", size = 1239218, upload-time = "2026-05-15T04:51:16.103Z" },
+    { url = "https://files.pythonhosted.org/packages/16/5b/f2aa703a4fc5d2dff73460a7d46cc2f3f44aa0f3dd8eeb20d2a0ecf68862/tiktoken-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:85b78cc3a2c3d48723ca751fa981f1fedccd54194ca0471b957364353a898b07", size = 918110, upload-time = "2026-05-15T04:51:17.237Z" },
+]
+
 [[package]]
 name = "tldextract"
 version = "5.3.1"
@@ -3956,6 +4386,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90", size = 6675, upload-time = "2025-01-15T12:07:22.074Z" },
 ]
 
+[[package]]
+name = "tqdm"
+version = "4.68.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/85/05/0d5260f1f1ca784f4a4a0def9cbe6affe587f5b4025328d446c3d67765f4/tqdm-4.68.2.tar.gz", hash = "sha256:89c230e8dbc67c7615c142487111222f878c77427ea09549960f62389e258add", size = 171923, upload-time = "2026-06-09T13:26:42.539Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/eb/75/1a0392bcc21c44dcdf87b3cf2d137e7829be2c083a1e38d44efca3d57a16/tqdm-4.68.2-py3-none-any.whl", hash = "sha256:d4240441fb5353290b87d6a85968c9decc131a99b8c7faa28269d829de669ede", size = 78578, upload-time = "2026-06-09T13:26:40.731Z" },
+]
+
+[[package]]
+name = "truststore"
+version = "0.10.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/a3/1585216310e344e8102c22482f6060c7a6ea0322b63e026372e6dcefcfd6/truststore-0.10.4.tar.gz", hash = "sha256:9d91bd436463ad5e4ee4aba766628dd6cd7010cf3e2461756b3303710eebc301", size = 26169, upload-time = "2025-08-12T18:49:02.73Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/19/97/56608b2249fe206a67cd573bc93cd9896e1efb9e98bce9c163bcdc704b88/truststore-0.10.4-py3-none-any.whl", hash = "sha256:adaeaecf1cbb5f4de3b1959b42d41f6fab57b2b1666adb59e89cb0b53361d981", size = 18660, upload-time = "2025-08-12T18:49:01.46Z" },
+]
+
 [[package]]
 name = "ty"
 version = "0.0.37"

From 490b6457eb7549b1115ca4b457de5391064c5b57 Mon Sep 17 00:00:00 2001
From: Max Bohomolov <moriturus7@gmail.com>
Date: Tue, 16 Jun 2026 19:41:14 +0000
Subject: [PATCH 2/3] add tests

---
 .../crawlers/_ai/_clean_html_distiller.py     |   7 +-
 src/crawlee/crawlers/_ai/_prompts.py          |   2 +-
 .../crawlers/_ai/_selector_extractor.py       |  11 +-
 .../crawlers/_ai/_skeleton_distiller.py       |   2 +-
 tests/unit/crawlers/_ai/test_ai_crawler.py    | 116 ++++++
 .../crawlers/_ai/test_clean_html_distiller.py | 135 +++++++
 .../crawlers/_ai/test_direct_extractor.py     | 161 ++++++++
 .../crawlers/_ai/test_selector_extractor.py   | 343 ++++++++++++++++++
 .../crawlers/_ai/test_skeleton_distiller.py   | 105 ++++++
 9 files changed, 872 insertions(+), 10 deletions(-)
 create mode 100644 tests/unit/crawlers/_ai/test_ai_crawler.py
 create mode 100644 tests/unit/crawlers/_ai/test_clean_html_distiller.py
 create mode 100644 tests/unit/crawlers/_ai/test_direct_extractor.py
 create mode 100644 tests/unit/crawlers/_ai/test_selector_extractor.py
 create mode 100644 tests/unit/crawlers/_ai/test_skeleton_distiller.py

diff --git a/src/crawlee/crawlers/_ai/_clean_html_distiller.py b/src/crawlee/crawlers/_ai/_clean_html_distiller.py
index 644a6c12e7..28c5c99dd0 100644
--- a/src/crawlee/crawlers/_ai/_clean_html_distiller.py
+++ b/src/crawlee/crawlers/_ai/_clean_html_distiller.py
@@ -4,7 +4,6 @@
 from typing import TYPE_CHECKING
 
 import lxml.html
-from lxml import etree  # ty: ignore[unresolved-import]
 from typing_extensions import override
 
 from crawlee._utils.docs import docs_group
@@ -196,7 +195,7 @@ def _filter_attributes(self, tree: HtmlElement) -> None:
                         else:
                             del elem.attrib[name]
                     elif len(value) > self._max_attr_len:
-                        elem.attrib[name] = value[: self._max_attr_len]
+                        elem.attrib[name] = f'{value[: self._max_attr_len]}{_TRUNCATION_MARKER}'
                 else:
                     del elem.attrib[name]
 
@@ -235,7 +234,7 @@ def _serialize(self, tree: HtmlElement) -> str:
         Args:
             tree: The lxml tree.
         """
-        return etree.tostring(tree, encoding='unicode', pretty_print=self._pretty)
+        return lxml.html.tostring(tree, encoding='unicode', pretty_print=self._pretty)
 
     def _enforce_max_size(
         self,
@@ -253,7 +252,7 @@ def _enforce_max_size(
             logger.warning(
                 f'{type(self).__name__} output exceeds max_size ({len(distilled_html)} > {self._max_size}). '
                 'The tail of the page is cut off and invisible to the LLM. '
-                'Raise `max_size`, `scope` the extraction, or set `max_json_len`.'
+                'Raise `max_size` or set `max_json_len`.'
             )
             return distilled_html[: self._max_size] + _TRUNCATION_MARKER
 
diff --git a/src/crawlee/crawlers/_ai/_prompts.py b/src/crawlee/crawlers/_ai/_prompts.py
index 9525bcba3a..7049eb40b6 100644
--- a/src/crawlee/crawlers/_ai/_prompts.py
+++ b/src/crawlee/crawlers/_ai/_prompts.py
@@ -1,5 +1,5 @@
 # Marker for truncated values in distillate documents and prompt instructions.
-_TRUNCATION_MARKER = '…'
+_TRUNCATION_MARKER = '[...]'
 
 # Default prompt instructions for direct extraction from HTML.
 _DIRECT_INSTRUCTIONS = (
diff --git a/src/crawlee/crawlers/_ai/_selector_extractor.py b/src/crawlee/crawlers/_ai/_selector_extractor.py
index 51668d83b1..43db946cce 100644
--- a/src/crawlee/crawlers/_ai/_selector_extractor.py
+++ b/src/crawlee/crawlers/_ai/_selector_extractor.py
@@ -7,7 +7,7 @@
 from collections import defaultdict
 from enum import Enum
 from logging import getLogger
-from typing import TYPE_CHECKING, Union, cast, get_args, get_origin
+from typing import TYPE_CHECKING, Literal, Union, cast, get_args, get_origin
 
 from cssselect import SelectorError
 from pydantic import BaseModel, Field, ValidationError
@@ -81,7 +81,7 @@ class _FieldKind(Enum):
     """A scalar value extracted by one leaf selector."""
 
     LIST_SCALAR = 'list_scalar'
-    """A list of scalars extracted by one leaf selector matching many nodes."""
+    """A list or set of scalars extracted by one leaf selector matching many nodes."""
 
     LIST_MODEL = 'list_model'
     """A list of items. Maps to a container selector plus relative sub-selectors."""
@@ -546,7 +546,7 @@ def _classify_field(self, annotation: Any) -> tuple[_FieldKind, type[BaseModel]
         annotation = self._unwrap_optional(annotation)
         origin = get_origin(annotation)
 
-        if origin is list:
+        if origin in (list, set):
             args = get_args(annotation)
             item = self._unwrap_optional(args[0]) if args else str
             # `list[A | B]` is ambiguous: a match can't be tied to a specific union member, so treat it as unsupported.
@@ -554,13 +554,16 @@ def _classify_field(self, annotation: Any) -> tuple[_FieldKind, type[BaseModel]
                 return _FieldKind.LIST_UNION, None
             if isinstance(item, type) and issubclass(item, BaseModel):
                 return _FieldKind.LIST_MODEL, item
-            if get_origin(item) is list:
+            if get_origin(item) in (list, set):
                 return _FieldKind.LIST_OF_LISTS, None
             return _FieldKind.LIST_SCALAR, None
         if isinstance(annotation, type) and issubclass(annotation, BaseModel):
             return _FieldKind.NESTED_MODEL, annotation
         if origin is dict:
             return _FieldKind.MAPPING, None
+        # A `Literal` constrains a value to a fixed set. It is extracted as a leaf string and validated by Pydantic.
+        if origin is Literal:
+            return _FieldKind.LEAF, None
         if origin is not None:
             return _FieldKind.UNSUPPORTED, None
         return _FieldKind.LEAF, None
diff --git a/src/crawlee/crawlers/_ai/_skeleton_distiller.py b/src/crawlee/crawlers/_ai/_skeleton_distiller.py
index f77b0bb11d..b913edd500 100644
--- a/src/crawlee/crawlers/_ai/_skeleton_distiller.py
+++ b/src/crawlee/crawlers/_ai/_skeleton_distiller.py
@@ -209,7 +209,7 @@ def _enforce_max_size(self, distilled_html: str, html: str) -> str:
             logger.warning(
                 f'Skeleton exceeds max_size even after tightening ({len(tighter_distilled_html)} > {self._max_size}). '
                 'The tail of the page is cut off and invisible to the LLM. '
-                'Raise `max_size`, `scope` the extraction, or set `max_json_len`.'
+                'Raise `max_size` or set `max_json_len`.'
             )
             return tighter_distilled_html[: self._max_size] + _TRUNCATION_MARKER
 
diff --git a/tests/unit/crawlers/_ai/test_ai_crawler.py b/tests/unit/crawlers/_ai/test_ai_crawler.py
new file mode 100644
index 0000000000..624e0c5e27
--- /dev/null
+++ b/tests/unit/crawlers/_ai/test_ai_crawler.py
@@ -0,0 +1,116 @@
+from __future__ import annotations
+
+import sys
+from typing import TYPE_CHECKING
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from parsel import Selector
+from pydantic import BaseModel
+from pydantic_ai.models.test import TestModel
+
+from crawlee import Request
+from crawlee.crawlers import AiCrawler, AiCrawlingContext, AiDirectExtractor, ParselCrawlingContext
+from crawlee.crawlers._ai._types import AiUsageStats
+
+if TYPE_CHECKING:
+    from yarl import URL
+
+    from crawlee.http_clients._base import HttpClient
+
+
+class _Article(BaseModel):
+    title: str
+
+
+def test_requires_exactly_one_of_model_or_extractor() -> None:
+    with pytest.raises(ValueError, match='exactly one'):
+        AiCrawler()
+
+    with pytest.raises(ValueError, match='exactly one'):
+        AiCrawler(model=TestModel(), extractor=AiDirectExtractor(TestModel()))
+
+
+def test_default_extractor_is_direct() -> None:
+    assert isinstance(AiCrawler(model=TestModel()).extractor, AiDirectExtractor)
+
+
+def test_emits_experimental_warning() -> None:
+    with pytest.warns(UserWarning, match='experimental'):
+        AiCrawler(model=TestModel())
+
+
+def test_exposes_extractor_and_usage() -> None:
+    extractor = AiDirectExtractor(TestModel())
+    crawler = AiCrawler(extractor=extractor)
+
+    assert crawler.extractor is extractor
+    assert crawler.ai_usage is extractor.ai_usage
+
+
+async def test_context_extract(server_url: URL, http_client: HttpClient) -> None:
+    crawler = AiCrawler(model=TestModel(custom_output_args={'title': 'Hello'}), http_client=http_client)
+    extracted = AsyncMock()
+
+    @crawler.router.default_handler
+    async def request_handler(context: AiCrawlingContext) -> None:
+        await extracted(await context.extract(_Article))
+
+    await crawler.run([str(server_url / 'start_enqueue')])
+
+    extracted.assert_awaited_once_with(_Article(title='Hello'))
+    assert crawler.ai_usage.requests == 1
+
+
+async def test_crawling_context_type(server_url: URL, http_client: HttpClient) -> None:
+    crawler = AiCrawler(model=TestModel(custom_output_args={'title': 'Hello'}), http_client=http_client)
+    handler = AsyncMock()
+    crawler.router.default_handler(handler)
+
+    await crawler.run([str(server_url / 'start_enqueue')])
+
+    handler.assert_awaited_once()
+    context = handler.call_args.args[0]
+
+    # It extends the Parsel context, so the manual `selector` stays available next to the AI helpers.
+    assert isinstance(context, AiCrawlingContext)
+    assert isinstance(context, ParselCrawlingContext)
+    assert isinstance(context.selector, Selector)
+    assert isinstance(context.ai_usage, AiUsageStats)
+
+
+async def test_context_extractor_forwards_arguments(server_url: URL, http_client: HttpClient) -> None:
+    extractor = AiDirectExtractor(TestModel())
+    crawler = AiCrawler(extractor=extractor, http_client=http_client)
+    extract_mock = AsyncMock(return_value=_Article(title='test'))
+    seen_selector = AsyncMock()
+
+    @crawler.router.default_handler
+    async def request_handler(context: AiCrawlingContext) -> None:
+        await seen_selector(context.selector)
+        await context.extract(_Article)  # cache_tag defaults to the request label
+        await context.extract(_Article, cache_tag='explicit')  # an explicit tag overrides the default
+        await context.extract(_Article, scope='article', additional_instructions='hint')
+
+    with patch.object(extractor, 'extract', extract_mock):
+        await crawler.run([Request.from_url(str(server_url), label='detail')])
+
+    first_call, second_call, third_call = extract_mock.call_args_list
+
+    # The live parsed selector is handed over as the first positional argument, without a re-parse.
+    assert first_call.args[0] is seen_selector.call_args.args[0]
+    assert first_call.kwargs['cache_tag'] == 'detail'
+    assert second_call.kwargs['cache_tag'] == 'explicit'
+    assert third_call.kwargs['scope'] == 'article'
+    assert third_call.kwargs['additional_instructions'] == 'hint'
+
+
+def test_import_error_handled() -> None:
+    # The `ai` extra is optional, so accessing the crawler without `pydantic_ai` installed must raise a clear error.
+    blocked = {name: None for name in sys.modules if name == 'pydantic_ai' or name.startswith('pydantic_ai.')}
+    with patch.dict('sys.modules', blocked):
+        for name in list(sys.modules):
+            if name.startswith('crawlee.crawlers._ai'):
+                sys.modules.pop(name, None)
+        with pytest.raises(ImportError):
+            from crawlee.crawlers._ai import AiCrawler  # noqa: F401 PLC0415
diff --git a/tests/unit/crawlers/_ai/test_clean_html_distiller.py b/tests/unit/crawlers/_ai/test_clean_html_distiller.py
new file mode 100644
index 0000000000..6ef15b3cd6
--- /dev/null
+++ b/tests/unit/crawlers/_ai/test_clean_html_distiller.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from crawlee.crawlers import AiCleanHtmlDistiller
+from crawlee.crawlers._ai._prompts import _CLEAN_HTML_PROMPT_NOTES, _TRUNCATION_MARKER
+
+if TYPE_CHECKING:
+    import pytest
+
+
+def test_empty_html_input() -> None:
+    distiller = AiCleanHtmlDistiller()
+    assert distiller.distill('') == ''
+    assert distiller.distill('   \n\t ') == ''
+
+
+def test_prompt_notes() -> None:
+    assert AiCleanHtmlDistiller().get_prompt_notes() == _CLEAN_HTML_PROMPT_NOTES
+    assert AiCleanHtmlDistiller(prompt_notes=None).get_prompt_notes() is None
+    assert AiCleanHtmlDistiller(prompt_notes='custom').get_prompt_notes() == 'custom'
+
+
+def test_keeps_text_and_semantic_attributes() -> None:
+    html = '<div class="card" itemprop="product" data-id="7"><a href="/p">Item</a></div>'
+    distilled_html = AiCleanHtmlDistiller().distill(html)
+
+    assert html == distilled_html
+
+
+def test_drops_non_semantic_attributes() -> None:
+    distilled_html = AiCleanHtmlDistiller().distill(
+        '<div style="color:red" onclick="hack()" tabindex="2"><p>Item</p></div>'
+    )
+
+    assert distilled_html == '<div><p>Item</p></div>'
+
+
+def test_drops_scripts_and_styles() -> None:
+    distilled_html = AiCleanHtmlDistiller().distill(
+        '<body><script>evil()</script><style>.a{color:red}</style><p>Item</p></body>'
+    )
+
+    assert distilled_html == '<div><p>Item</p></div>'
+
+
+def test_drops_noise_tags() -> None:
+    distilled_html = AiCleanHtmlDistiller().distill('<div><svg><path/></svg><noscript>x</noscript><p>Item</p></div>')
+
+    assert distilled_html == '<div><p>Item</p></div>'
+
+
+def test_saves_json_ld_script() -> None:
+    html = '<div><script type="application/ld+json">{"name": "Phone"}</script></div>'
+    distilled_html = AiCleanHtmlDistiller().distill(html)
+
+    assert distilled_html == html
+
+
+def test_drops_data_uri_attribute() -> None:
+    distilled_html = AiCleanHtmlDistiller().distill('<img src="data:image/png;base64,AAAABBBB" alt="logo">')
+
+    assert distilled_html == '<img alt="logo">'
+
+
+def test_limited_class_attribute() -> None:
+    distilled_html = AiCleanHtmlDistiller(max_classes=2).distill('<div class="a b c d e">x</div>')
+
+    assert distilled_html == '<div class="a b">x</div>'
+
+
+def test_drops_empty_class_attribute() -> None:
+    distilled_html = AiCleanHtmlDistiller().distill('<div class="   ">x</div>')
+
+    assert distilled_html == '<div>x</div>'
+
+
+def test_truncates_long_attribute_values() -> None:
+    distilled_html = AiCleanHtmlDistiller(max_attr_len=5).distill(f'<a href="{"x" * 50}">link</a>')
+
+    assert distilled_html == f'<a href="xxxxx{_TRUNCATION_MARKER}">link</a>'
+
+
+def test_truncates_json_payload() -> None:
+    distilled_html = AiCleanHtmlDistiller(max_json_len=5).distill(
+        '<div><script type="application/json">{"long": "value here"}</script></div>'
+    )
+
+    assert distilled_html == f'<div><script type="application/json">{{"lon{_TRUNCATION_MARKER}</script></div>'
+
+
+def test_keep_head_useful_tags() -> None:
+    html = (
+        '<html><head>'
+        '<title>Page</title>'
+        '<meta name="description" content="desc">'
+        '<meta charset="utf-8">'
+        '<link rel="stylesheet" href="/a.css">'
+        '<script type="application/ld+json">{"k": 1}</script>'
+        '</head><body><p>Body</p></body></html>'
+    )
+    distilled_html = AiCleanHtmlDistiller().distill(html)
+
+    assert distilled_html == (
+        '<html><head>'
+        '<title>Page</title>'
+        '<meta name="description" content="desc">'
+        '<script type="application/ld+json">{"k": 1}</script>'
+        '</head><body><p>Body</p></body></html>'
+    )
+
+
+def test_drops_head() -> None:
+    distilled_html = AiCleanHtmlDistiller(keep_head=False).distill(
+        '<html><head><title>Page</title></head><body><p>Body</p></body></html>'
+    )
+
+    assert distilled_html == '<html><body><p>Body</p></body></html>'
+
+
+def test_normalizes_whitespace() -> None:
+    distilled_html = AiCleanHtmlDistiller().distill('<p>a    b\n\n\tc</p>')
+
+    assert distilled_html == '<p>a b c</p>'
+
+
+def test_enforces_max_size(caplog: pytest.LogCaptureFixture) -> None:
+    html = f'<div>{"<p>x</p>" * 500}</div>'
+    with caplog.at_level(logging.WARNING, logger='crawlee.crawlers._ai._clean_html_distiller'):
+        out = AiCleanHtmlDistiller(max_size=50).distill(html)
+
+    assert len(out) == 50 + len(_TRUNCATION_MARKER)
+    assert out.endswith(_TRUNCATION_MARKER)
+    assert any('max_size' in record.message for record in caplog.records)
diff --git a/tests/unit/crawlers/_ai/test_direct_extractor.py b/tests/unit/crawlers/_ai/test_direct_extractor.py
new file mode 100644
index 0000000000..3fb2495b4b
--- /dev/null
+++ b/tests/unit/crawlers/_ai/test_direct_extractor.py
@@ -0,0 +1,161 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from parsel import Selector
+from pydantic import BaseModel
+from pydantic_ai import capture_run_messages
+from pydantic_ai.exceptions import UnexpectedModelBehavior
+from pydantic_ai.messages import ModelRequest, UserPromptPart
+from pydantic_ai.models.test import TestModel
+
+from crawlee.crawlers import AiDirectExtractor, BaseAiHtmlDistiller
+from crawlee.crawlers._ai._prompts import _DIRECT_INSTRUCTIONS
+
+if TYPE_CHECKING:
+    from pydantic_ai.messages import ModelMessage
+
+
+DEFAULT_OUTPUT_ARGS = {'name': 'Phone', 'price': '$9'}
+
+
+class _Product(BaseModel):
+    name: str
+    price: str | None = None
+
+
+class _MockDistiller(BaseAiHtmlDistiller):
+    """Distiller returning a fixed marker so the prompt content can be asserted."""
+
+    def distill(self, html: str) -> str:
+        _html = html
+        return 'MOCK-DISTILLED-HTML'
+
+    def get_prompt_notes(self) -> str | None:
+        return 'MOCK-NOTES'
+
+
+def _extract_model_input(messages: list[ModelMessage]) -> tuple[str, str]:
+    """Return the (user prompt, instructions) of the prompt request the model received.
+
+    A run produces several requests (the prompt, then a tool-return follow-up). Only the first carries the user
+    prompt, so it is the one to inspect.
+    """
+    request = next(
+        message
+        for message in messages
+        if isinstance(message, ModelRequest) and any(isinstance(part, UserPromptPart) for part in message.parts)
+    )
+    prompt = next(part.content for part in request.parts if isinstance(part, UserPromptPart))
+    return str(prompt), request.instructions or ''
+
+
+async def test_returns_validated_model() -> None:
+    extractor = AiDirectExtractor(TestModel(custom_output_args=DEFAULT_OUTPUT_ARGS))
+
+    result = await extractor.extract('<html></html>', _Product)
+
+    assert isinstance(result, _Product)
+    assert result.name == 'Phone'
+    assert result.price == '$9'
+
+
+async def test_counts_token_usage() -> None:
+    extractor = AiDirectExtractor(TestModel(custom_output_args=DEFAULT_OUTPUT_ARGS))
+
+    await extractor.extract('<html></html>', _Product)
+
+    assert extractor.ai_usage.requests == 1
+    assert extractor.ai_usage.input_tokens > 0
+    assert extractor.ai_usage.output_tokens > 0
+    assert extractor.ai_usage.total_tokens > 0
+
+
+async def test_accepts_selector_input() -> None:
+    extractor = AiDirectExtractor(TestModel(custom_output_args=DEFAULT_OUTPUT_ARGS))
+
+    html = '<html><body><div>UNIQUE-CONTENT</div></body></html>'
+    with capture_run_messages() as messages:
+        await extractor.extract(Selector(text=html), _Product)
+
+    prompt, _ = _extract_model_input(messages)
+
+    html_part = prompt.split('Document:')[1].strip()
+
+    assert html_part == html
+
+
+async def test_scope_subtree() -> None:
+    extractor = AiDirectExtractor(TestModel(custom_output_args=DEFAULT_OUTPUT_ARGS))
+
+    with capture_run_messages() as messages:
+        await extractor.extract(
+            '<div><article><h1>Phone</h1></article><footer>junk</footer></div>',
+            _Product,
+            scope='article',
+        )
+
+    prompt, _ = _extract_model_input(messages)
+
+    html_part = prompt.split('Document:')[1].strip()
+
+    assert html_part == '<article><h1>Phone</h1></article>'
+
+
+async def test_scope_raises() -> None:
+    extractor = AiDirectExtractor(TestModel(custom_output_args=DEFAULT_OUTPUT_ARGS))
+
+    with pytest.raises(ValueError, match='matched nothing'):
+        await extractor.extract('<div>x</div>', _Product, scope='.missing')
+
+
+async def test_input_prompt() -> None:
+    extractor = AiDirectExtractor(TestModel(custom_output_args=DEFAULT_OUTPUT_ARGS), distiller=_MockDistiller())
+
+    with capture_run_messages() as messages:
+        await extractor.extract('<html></html>', _Product)
+
+    prompt, _ = _extract_model_input(messages)
+
+    assert 'name, price' in prompt
+    assert 'MOCK-DISTILLED-HTML' in prompt
+
+
+async def test_instructions() -> None:
+    extractor = AiDirectExtractor(
+        TestModel(custom_output_args=DEFAULT_OUTPUT_ARGS),
+        distiller=_MockDistiller(),
+    )
+
+    with capture_run_messages() as messages:
+        await extractor.extract('<html></html>', _Product)
+
+    _, instructions = _extract_model_input(messages)
+
+    assert 'MOCK-NOTES' in instructions
+    assert _DIRECT_INSTRUCTIONS in instructions
+
+
+async def test_additional_instructions() -> None:
+    extractor = AiDirectExtractor(TestModel(custom_output_args=DEFAULT_OUTPUT_ARGS), distiller=_MockDistiller())
+
+    with capture_run_messages() as messages:
+        await extractor.extract('<h1>Phone</h1>', _Product, additional_instructions='PER-CALL-HINT')
+
+    _, instructions = _extract_model_input(messages)
+
+    # Both the base instructions and the per-call hint reach the model.
+    assert 'PER-CALL-HINT' in instructions
+    assert _DIRECT_INSTRUCTIONS in instructions
+
+
+async def test_raise_for_invalid_output() -> None:
+    # `name` is required, so output missing it fails validation on every retry until the run errors.
+    extractor = AiDirectExtractor(TestModel(custom_output_args={'price': '$9'}), retries=2)
+
+    with pytest.raises(UnexpectedModelBehavior):
+        await extractor.extract('<h1>x</h1>', _Product)
+
+    # The extractor's usage stats reflect the 3 failed attempts (1 initial + 2 retries).
+    assert extractor.ai_usage.requests == 3
diff --git a/tests/unit/crawlers/_ai/test_selector_extractor.py b/tests/unit/crawlers/_ai/test_selector_extractor.py
new file mode 100644
index 0000000000..d95c0f9210
--- /dev/null
+++ b/tests/unit/crawlers/_ai/test_selector_extractor.py
@@ -0,0 +1,343 @@
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING, Literal
+
+import pytest
+from pydantic import BaseModel, create_model
+from pydantic_ai.exceptions import UnexpectedModelBehavior
+from pydantic_ai.messages import ModelResponse, ToolCallPart
+from pydantic_ai.models.function import FunctionModel
+from pydantic_ai.models.test import TestModel
+
+from crawlee.crawlers import AiDirectExtractor, AiSelectorExtractor, AiUsageStats
+
+if TYPE_CHECKING:
+    from typing import Any
+
+    from pydantic_ai.messages import ModelMessage
+    from pydantic_ai.models import Model
+    from pydantic_ai.models.function import AgentInfo
+
+    from crawlee.crawlers._ai._types import AiHtmlExtractor
+
+
+class _Item(BaseModel):
+    name: str
+
+
+class _Posts(BaseModel):
+    posts: list[_Item]
+
+
+class _Nested(BaseModel):
+    title: str
+    item: _Item | None = None
+
+
+class _Collections(BaseModel):
+    items: list[str]
+    unique: set[str]
+
+
+class _Status(BaseModel):
+    status: Literal['in_stock', 'sold_out']
+
+
+class _Mapping(BaseModel):
+    data: dict[str, str]
+
+
+NAME_HTML = '<div><span class="n">X</span></div>'
+LIST_HTML = '<ul><li class="r"><a class="t" href="/a">A</a></li><li class="r"><a class="t" href="/b">B</a></li></ul>'
+
+NAME_SELECTORS = {'selectors': {'name': {'selector': '.n::text'}}}
+POSTS_SELECTORS = {'selectors': {'posts': {'selector': 'li.r', 'fields': {'name': {'selector': '.t::text'}}}}}
+
+
+def _model(*plans: dict[str, Any]) -> FunctionModel:
+    """Build a model that returns the given selector maps in order, repeating the last for any further calls."""
+    state = {'index': 0}
+
+    def respond(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
+        _messages = messages
+        plan = plans[min(state['index'], len(plans) - 1)]
+        state['index'] += 1
+        return ModelResponse(parts=[ToolCallPart(tool_name=info.output_tools[0].name, args=plan)])
+
+    return FunctionModel(respond)
+
+
+def _extractor(
+    model: str | Model | None = None,
+    *,
+    fallback: AiHtmlExtractor | None = None,
+    retries: int = 3,
+    max_variants: int = 5,
+) -> AiSelectorExtractor:
+    return AiSelectorExtractor(
+        model or TestModel(),
+        persistence=False,
+        fallback=fallback,
+        retries=retries,
+        max_variants=max_variants,
+    )
+
+
+@pytest.mark.parametrize(
+    ('annotation', 'match'),
+    [
+        pytest.param(dict[str, str], 'mapping', id='dict'),
+        pytest.param(list[int | str], 'list of a union', id='list-of-union'),
+        pytest.param(list[list[str]], 'list of lists', id='list-of-lists'),
+        pytest.param(tuple[int, ...], 'unsupported annotation', id='tuple'),
+        pytest.param(int | str, 'unsupported annotation', id='scalar-union'),
+        pytest.param(_Posts, 'deeper than one level', id='deep-nesting'),
+    ],
+)
+async def test_rejects_unsupported_schema(annotation: Any, match: str) -> None:
+    # The schema shape is checked before any model call, so an unsupported one raises with the reason.
+    schema = create_model('_Unsupported', field=(annotation, ...))
+
+    with pytest.raises(ValueError, match=match):
+        await _extractor().extract('<div></div>', schema)
+
+
+async def test_extracts_scalar() -> None:
+    result = await _extractor(_model(NAME_SELECTORS)).extract(NAME_HTML, _Item)
+
+    assert result == _Item(name='X')
+
+
+async def test_extracts_collection_of_scalars() -> None:
+    plan = {'selectors': {'items': {'selector': '.a::text'}, 'unique': {'selector': '.b::text'}}}
+    html = '<div><span class="a">x</span><span class="a">y</span><span class="b">p</span><span class="b">p</span></div>'
+
+    result = await _extractor(_model(plan)).extract(html, _Collections)
+
+    # The list keeps order and duplicates; the set is deduplicated.
+    assert result == _Collections(items=['x', 'y'], unique={'p'})
+
+
+async def test_extracts_list_of_items() -> None:
+    result = await _extractor(_model(POSTS_SELECTORS)).extract(LIST_HTML, _Posts)
+
+    assert result == _Posts(posts=[_Item(name='A'), _Item(name='B')])
+
+
+async def test_extracts_literal_field() -> None:
+    plan = {'selectors': {'status': {'selector': '.s::text'}}}
+
+    result = await _extractor(_model(plan)).extract('<div><span class="s">in_stock</span></div>', _Status)
+
+    assert result == _Status(status='in_stock')
+
+
+async def test_reuses_cached_plan() -> None:
+    extractor = _extractor(_model(NAME_SELECTORS))
+
+    assert await extractor.extract(NAME_HTML, _Item, cache_tag='test') == _Item(name='X')
+    assert await extractor.extract(NAME_HTML, _Item, cache_tag='test') == _Item(name='X')
+    # The second extract is served from the cache, so the model was consulted only once.
+    assert extractor.ai_usage.requests == 1
+
+
+async def test_concurrent_generate() -> None:
+    extractor = _extractor(_model(NAME_SELECTORS))
+
+    results = await asyncio.gather(
+        extractor.extract(NAME_HTML, _Item, cache_tag='test'),
+        extractor.extract(NAME_HTML, _Item, cache_tag='test'),
+    )
+
+    assert results == [_Item(name='X'), _Item(name='X')]
+    assert extractor.ai_usage.requests == 1
+
+
+async def test_cached_plan_for_optional_field() -> None:
+    plan = {
+        'selectors': {
+            'title': {'selector': 'h1::text'},
+            'item': {'selector': '.item', 'fields': {'name': {'selector': '.n::text'}}},
+        }
+    }
+    extractor = _extractor(_model(plan))
+
+    first_call = await extractor.extract(
+        '<div><h1>T</h1><div class="item"><span class="n">X</span></div></div>', _Nested, cache_tag='test'
+    )
+    # The second page has no item, but the cached plan is still valid and returns None for the optional field.
+    second_call = await extractor.extract('<div><h1>T2</h1></div>', _Nested, cache_tag='test')
+
+    assert first_call == _Nested(title='T', item=_Item(name='X'))
+    assert second_call == _Nested(title='T2', item=None)
+    assert extractor.ai_usage.requests == 1
+
+
+async def test_caches_different_tags() -> None:
+    extractor = _extractor(_model(NAME_SELECTORS))
+
+    await extractor.extract(NAME_HTML, _Item, cache_tag='a')
+    await extractor.extract(NAME_HTML, _Item, cache_tag='b')  # different bucket, generated again
+    await extractor.extract(NAME_HTML, _Item, cache_tag='a')  # first bucket still cached
+
+    # One generation per tag. A shared bucket would have served 'b' from cache, leaving the count at one.
+    assert extractor.ai_usage.requests == 2
+
+
+async def test_eviction_of_oldest_variant() -> None:
+    pages = {
+        'a': '<div><span class="a">A</span></div>',
+        'b': '<div><span class="b">B</span></div>',
+        'c': '<div><span class="c">C</span></div>',
+    }
+    extractor = _extractor(
+        _model(
+            {'selectors': {'name': {'selector': '.a::text'}}},
+            {'selectors': {'name': {'selector': '.b::text'}}},
+            {'selectors': {'name': {'selector': '.c::text'}}},  # 'c' generated and evicted 'a'
+            {'selectors': {'name': {'selector': '.a::text'}}},  # 'a' regenerated after eviction
+        ),
+        max_variants=2,
+    )
+
+    for key in ('a', 'b', 'c'):
+        await extractor.extract(pages[key], _Item, cache_tag='test')
+
+    # With max_variants=2 the 'a' plan was evicted, so extracting 'a' again generates a fourth time.
+    await extractor.extract(pages['a'], _Item, cache_tag='test')
+
+    assert extractor.ai_usage.requests == 4
+
+
+@pytest.mark.parametrize(
+    'invalid_plan',
+    [
+        pytest.param({'selectors': {}}, id='missing-field'),
+        pytest.param(
+            {'selectors': {'posts': {'selector': 'li.r::text', 'fields': {'name': {'selector': '.t::text'}}}}},
+            id='container-with-value-form',
+        ),
+        pytest.param(
+            {'selectors': {'posts': {'selector': 'li[', 'fields': {'name': {'selector': '.t::text'}}}}},
+            id='invalid-css',
+        ),
+        pytest.param(
+            {'selectors': {'posts': {'selector': 'li.r', 'fields': {'name': {'selector': '.t'}}}}},
+            id='sub-without-value-form',
+        ),
+        pytest.param(
+            {'selectors': {'posts': {'selector': 'li.r', 'fields': {'name': {'selector': '.absent::text'}}}}},
+            id='sub-matches-nothing',
+        ),
+    ],
+)
+async def test_retries_with_invalid_plan(invalid_plan: dict[str, Any]) -> None:
+    # Each plan trips a different validation guard. The model is asked to fix it, then returns a valid plan.
+    extractor = _extractor(_model(invalid_plan, POSTS_SELECTORS))
+
+    result = await extractor.extract(LIST_HTML, _Posts, cache_tag='test')
+
+    assert result == _Posts(posts=[_Item(name='A'), _Item(name='B')])
+    assert extractor.ai_usage.requests == 2  # the first plan failed validation, the second one succeeded
+
+
+async def test_retries_with_invalid_data() -> None:
+    # The selectors are well-formed and match, but the extracted value fails schema validation, so the plan is retried.
+    html = '<div><span class="bad">WRONG</span><span class="good">in_stock</span></div>'
+    extractor = _extractor(
+        _model(
+            {'selectors': {'status': {'selector': '.bad::text'}}},  # 'WRONG' is not a valid Literal value
+            {'selectors': {'status': {'selector': '.good::text'}}},
+        )
+    )
+
+    result = await extractor.extract(html, _Status, cache_tag='test')
+
+    assert result == _Status(status='in_stock')
+    assert extractor.ai_usage.requests == 2
+
+
+async def test_unsupported_schema_delegates_to_fallback() -> None:
+    fallback = AiDirectExtractor(TestModel(custom_output_args={'data': {'k': 'v'}}))
+
+    result = await _extractor(fallback=fallback).extract('<div></div>', _Mapping)
+
+    assert result == _Mapping(data={'k': 'v'})
+
+
+async def test_generation_failure_delegates_to_fallback() -> None:
+    # The selector matches nothing, so generation fails and the extractor degrades to the fallback.
+    bad_plan = {'selectors': {'name': {'selector': '.absent::text'}}}
+    fallback = AiDirectExtractor(TestModel(custom_output_args={'name': 'from-fallback'}))
+
+    result = await _extractor(_model(bad_plan), fallback=fallback, retries=0).extract(
+        NAME_HTML, _Item, cache_tag='test'
+    )
+
+    assert result == _Item(name='from-fallback')
+
+
+async def test_generation_failure_raises() -> None:
+    bad_plan = {'selectors': {'name': {'selector': '.absent::text'}}}
+
+    with pytest.raises(UnexpectedModelBehavior):
+        await _extractor(_model(bad_plan), retries=0).extract(NAME_HTML, _Item, cache_tag='test')
+
+
+async def test_scope_raises() -> None:
+    with pytest.raises(ValueError, match='matched nothing'):
+        await _extractor().extract('<div>x</div>', _Item, scope='.missing')
+
+
+async def test_fallback_shares_usage_accumulator() -> None:
+    fallback = AiDirectExtractor(TestModel())
+    extractor = _extractor(fallback=fallback)
+
+    assert fallback.ai_usage is extractor.ai_usage
+
+
+def test_set_ai_usage_reshares_with_fallback() -> None:
+    fallback = AiDirectExtractor(TestModel())
+    extractor = _extractor(fallback=fallback)
+    new_usage = AiUsageStats()
+
+    extractor.set_ai_usage(new_usage)
+
+    assert extractor.ai_usage is new_usage
+    assert fallback.ai_usage is new_usage
+
+
+async def test_active_state() -> None:
+    extractor = _extractor()
+
+    assert extractor.active is False
+    async with extractor:
+        assert extractor.active is True
+    assert extractor.active is False
+
+
+async def test_double_enter_raises() -> None:
+    extractor = _extractor()
+
+    async with extractor:
+        with pytest.raises(RuntimeError, match='already active'):
+            await extractor.__aenter__()
+
+
+async def test_exit_without_enter_raises() -> None:
+    extractor = _extractor()
+
+    with pytest.raises(RuntimeError, match='not active'):
+        await extractor.__aexit__(None, None, None)
+
+
+async def test_cache_persists_across_instances() -> None:
+    async with AiSelectorExtractor(_model(NAME_SELECTORS), kvs_cache_key='shared-cache') as first:
+        assert await first.extract(NAME_HTML, _Item, cache_tag='test') == _Item(name='X')
+        assert first.ai_usage.requests == 1
+
+    # A fresh instance loads the cache from the KeyValueStore and serves without calling the model.
+    async with AiSelectorExtractor(_model(NAME_SELECTORS), kvs_cache_key='shared-cache') as second:
+        assert await second.extract(NAME_HTML, _Item, cache_tag='test') == _Item(name='X')
+        assert second.ai_usage.requests == 0
diff --git a/tests/unit/crawlers/_ai/test_skeleton_distiller.py b/tests/unit/crawlers/_ai/test_skeleton_distiller.py
new file mode 100644
index 0000000000..30a2bc89c3
--- /dev/null
+++ b/tests/unit/crawlers/_ai/test_skeleton_distiller.py
@@ -0,0 +1,105 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from crawlee.crawlers import AiSkeletonDistiller
+from crawlee.crawlers._ai._prompts import _SKELETON_PROMPT_NOTES, _TRUNCATION_MARKER
+
+if TYPE_CHECKING:
+    import pytest
+
+
+def test_default_prompt_notes() -> None:
+    assert AiSkeletonDistiller().get_prompt_notes() == _SKELETON_PROMPT_NOTES
+
+
+def test_truncates_long_text() -> None:
+    distilled_html = AiSkeletonDistiller(max_text_len=5).distill(f'<p>{"a" * 20}</p>')
+
+    assert distilled_html == f'<p>aaaaa{_TRUNCATION_MARKER}</p>'
+
+
+def test_keeps_short_text() -> None:
+    distilled_html = AiSkeletonDistiller(max_text_len=50).distill('<p>short text</p>')
+
+    assert distilled_html == '<p>short text</p>'
+
+
+def test_collapses_repeated_siblings() -> None:
+    items = ''.join(f'<li class="item">item {index}</li>' for index in range(10))
+    distilled_html = AiSkeletonDistiller(keep_siblings=3).distill(f'<ul>{items}</ul>')
+
+    assert distilled_html == (
+        '<ul>'
+        '<li class="item">item 0</li>'
+        '<li class="item">item 1</li>'
+        '<li class="item">item 2</li>'
+        '<!-- ...7 more <li.item> siblings omitted -->'
+        '</ul>'
+    )
+
+
+def test_does_not_collapse_siblings_with_different_identity_attrs() -> None:
+    # Same tag and class, but different identity attributes, not a repeating template.
+    spans = ''.join(f'<span name="field-{index}">v</span>' for index in range(4))
+    distilled_html = AiSkeletonDistiller(keep_siblings=2).distill(f'<div>{spans}</div>')
+
+    assert distilled_html == (
+        '<div>'
+        '<span name="field-0">v</span>'
+        '<span name="field-1">v</span>'
+        '<span name="field-2">v</span>'
+        '<span name="field-3">v</span>'
+        '</div>'
+    )
+
+
+def test_does_not_collapse_scripts() -> None:
+    scripts = '<script type="application/json">{"a":1}</script>' * 4
+    distilled_html = AiSkeletonDistiller(keep_siblings=2).distill(f'<div>{scripts}</div>')
+
+    assert distilled_html == (
+        '<div>'
+        '<script type="application/json">{"a":1}</script>'
+        '<script type="application/json">{"a":1}</script>'
+        '<script type="application/json">{"a":1}</script>'
+        '<script type="application/json">{"a":1}</script>'
+        '</div>'
+    )
+
+
+def test_does_not_collapse_layout_markers() -> None:
+    distilled_html = AiSkeletonDistiller(keep_siblings=2).distill(f'<div>{"<br>" * 5}</div>')
+
+    assert distilled_html == '<div><br><br><br><br><br></div>'
+
+
+def test_redistills_for_oversize_without_cutting(caplog: pytest.LogCaptureFixture) -> None:
+    text = 'a' * 50
+    html = f'<div><p name="a">{text}</p><p name="b">{text}</p><p name="c">{text}</p></div>'
+
+    with caplog.at_level(logging.WARNING, logger='crawlee.crawlers._ai._skeleton_distiller'):
+        distilled_html = AiSkeletonDistiller(max_text_len=20, max_size=120).distill(html)
+
+    # The first distillation produces a skeleton of 134 chars, but the limit is 120 chars.
+    # The second distillation uses more aggressive text truncation to 15 chars, so the result is 119 chars.
+    assert distilled_html == (
+        f'<div>'
+        f'<p name="a">aaaaaaaaaaaaaaa{_TRUNCATION_MARKER}</p>'
+        f'<p name="b">aaaaaaaaaaaaaaa{_TRUNCATION_MARKER}</p>'
+        f'<p name="c">aaaaaaaaaaaaaaa{_TRUNCATION_MARKER}</p>'
+        f'</div>'
+    )
+    assert not caplog.records
+
+
+def test_cutting_for_oversize(caplog: pytest.LogCaptureFixture) -> None:
+    text = 'a' * 50
+    html = f'<div><p name="a">{text}</p><p name="b">{text}</p><p name="c">{text}</p></div>'
+
+    with caplog.at_level(logging.WARNING, logger='crawlee.crawlers._ai._skeleton_distiller'):
+        distilled_html = AiSkeletonDistiller(max_text_len=20, max_size=32).distill(html)
+
+    assert distilled_html == f'<div><p name="a">aaaaaaaaaaaaaaa{_TRUNCATION_MARKER}'
+    assert any('max_size' in record.message for record in caplog.records)

From 7f496a686e0150595c89f5aeba4153a9d738b9e6 Mon Sep 17 00:00:00 2001
From: Max Bohomolov <moriturus7@gmail.com>
Date: Wed, 17 Jun 2026 18:51:00 +0000
Subject: [PATCH 3/3] add docs

---
 docs/guides/ai_crawler.mdx                    | 150 ++++++++++++++++++
 docs/guides/architecture_overview.mdx         |  11 +-
 .../additional_instructions_example.py        |  44 +++++
 .../code_examples/ai_crawler/basic_example.py |  41 +++++
 .../ai_crawler/custom_distiller_example.py    |  67 ++++++++
 .../ai_crawler/selector_extractor_example.py  |  56 +++++++
 .../ai_crawler/usage_limit_example.py         |  57 +++++++
 src/crawlee/crawlers/_ai/_ai_crawler.py       |  19 +--
 tests/unit/crawlers/_ai/test_ai_crawler.py    |  15 +-
 9 files changed, 445 insertions(+), 15 deletions(-)
 create mode 100644 docs/guides/ai_crawler.mdx
 create mode 100644 docs/guides/code_examples/ai_crawler/additional_instructions_example.py
 create mode 100644 docs/guides/code_examples/ai_crawler/basic_example.py
 create mode 100644 docs/guides/code_examples/ai_crawler/custom_distiller_example.py
 create mode 100644 docs/guides/code_examples/ai_crawler/selector_extractor_example.py
 create mode 100644 docs/guides/code_examples/ai_crawler/usage_limit_example.py

diff --git a/docs/guides/ai_crawler.mdx b/docs/guides/ai_crawler.mdx
new file mode 100644
index 0000000000..17d030fe89
--- /dev/null
+++ b/docs/guides/ai_crawler.mdx
@@ -0,0 +1,150 @@
+---
+id: ai-crawler
+title: AI crawler
+description: Learn how to use AiCrawler to extract structured data from HTML pages with an LLM.
+---
+
+import ApiLink from '@site/src/components/ApiLink';
+import CodeBlock from '@theme/CodeBlock';
+
+import BasicExample from '!!raw-loader!./code_examples/ai_crawler/basic_example.py';
+import AdditionalInstructionsExample from '!!raw-loader!./code_examples/ai_crawler/additional_instructions_example.py';
+import CustomDistillerExample from '!!raw-loader!./code_examples/ai_crawler/custom_distiller_example.py';
+import SelectorExtractorExample from '!!raw-loader!./code_examples/ai_crawler/selector_extractor_example.py';
+import UsageLimitExample from '!!raw-loader!./code_examples/ai_crawler/usage_limit_example.py';
+
+An <ApiLink to="class/AiCrawler">`AiCrawler`</ApiLink> extracts structured data from a page with an LLM. It fetches each page over plain HTTP and parses it with Parsel, then exposes an <ApiLink to="class/ExtractFunction">`extract`</ApiLink> helper: pass a Pydantic model and get a validated instance back. Instead of writing CSS selectors for every field, you describe the data with a schema and the model fills it in.
+
+The model layer is [Pydantic AI](https://ai.pydantic.dev/), so any provider it supports (OpenAI, Anthropic, Gemini, Ollama, ...) works through the `model` argument. The context is an <ApiLink to="class/AiCrawlingContext">`AiCrawlingContext`</ApiLink>, which extends the <ApiLink to="class/ParselCrawlingContext">`ParselCrawlingContext`</ApiLink>, so the manual <ApiLink to="class/ParselCrawlingContext#selector">`selector`</ApiLink> and <ApiLink to="class/EnqueueLinksFunction">`enqueue_links`</ApiLink> stay available next to <ApiLink to="class/ExtractFunction">`extract`</ApiLink>.
+
+:::caution Experimental
+
+<ApiLink to="class/AiCrawler">`AiCrawler`</ApiLink> is experimental. Its public API may change in future releases.
+
+:::
+
+## When to use AiCrawler
+
+Use <ApiLink to="class/AiCrawler">`AiCrawler`</ApiLink> when:
+
+- Selectors are unknown or brittle. The model reads the content, so it tolerates markup that varies or changes.
+- One schema spans many layouts. A single Pydantic model fits differently structured pages, with no per-page selectors.
+- Rapid prototyping. You describe the data with a schema instead of writing selectors.
+
+For pages with a stable, known structure, a plain <ApiLink to="class/ParselCrawler">`ParselCrawler`</ApiLink> or <ApiLink to="class/BeautifulSoupCrawler">`BeautifulSoupCrawler`</ApiLink> is cheaper, since it runs no model calls.
+
+<ApiLink to="class/AiCrawler">`AiCrawler`</ApiLink> fetches pages over plain HTTP and does not render JavaScript. For pages that need a browser, or for complex multi-step interactions, use <ApiLink to="class/StagehandCrawler">`StagehandCrawler`</ApiLink>. See the [Stagehand crawler guide](./stagehand-crawler).
+
+## Installation
+
+<ApiLink to="class/AiCrawler">`AiCrawler`</ApiLink> requires the `ai` optional dependency group:
+
+```bash
+pip install 'crawlee[ai]'
+```
+
+or with uv:
+
+```bash
+uv add 'crawlee[ai]'
+```
+
+The `ai` extra installs the OpenAI integration by default. To use another provider, add the matching [pydantic-ai-slim](https://ai.pydantic.dev/install/#use-with-pydantic-ai-slim) extra. For example, for Anthropic:
+
+```bash
+pip install 'crawlee[ai]' 'pydantic-ai-slim[anthropic]'
+```
+
+## Basic usage
+
+Provide a `model` and call <ApiLink to="class/AiCrawlingContext#extract">`context.extract`</ApiLink> with a Pydantic model inside the handler. The example below extracts an article and pushes it to the dataset.
+
+<CodeBlock className="language-python">
+    {BasicExample}
+</CodeBlock>
+
+The `model` builds the crawler's default extractor, an <ApiLink to="class/AiDirectExtractor">`AiDirectExtractor`</ApiLink>. With neither `model` nor `extractor`, a default OpenAI model is used.
+
+The `model` argument accepts a provider-prefixed name or a Pydantic AI `Model` instance.
+
+```python
+# A provider-prefixed name reads credentials from the provider's environment variable (e.g. OPENAI_API_KEY).
+crawler = AiCrawler(model='openai:gpt-5.4-nano')
+
+# A Model instance takes credentials explicitly.
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.openai import OpenAIProvider
+
+model = OpenAIChatModel('gpt-5.4-nano', provider=OpenAIProvider(api_key='...'))
+crawler = AiCrawler(model=model)
+```
+
+## Extractors
+
+An extractor turns a page into your schema. Extractors implement different strategies for working with the LLM, and each one uses an <ApiLink to="class/AiHtmlDistiller">`AiHtmlDistiller`</ApiLink> to shape the model's input. Crawlee ships two.
+
+### AiDirectExtractor
+
+<ApiLink to="class/AiDirectExtractor">`AiDirectExtractor`</ApiLink> sends the distilled page to the model in one call. The schema is the model's output type. Pydantic AI validates the result; on a mismatch, it sends the error back to the model to fix, bounded by `retries`.
+
+It reads each page on its own, so extraction is accurate per page. It accepts schemas of any shape: nested models, lists, dictionaries, unions, and deep nesting. The cost is one model call per page, which scales poorly on a large site.
+
+Use `additional_instructions` to focus the model on the data you want:
+
+<CodeBlock className="language-python">
+    {AdditionalInstructionsExample}
+</CodeBlock>
+
+### AiSelectorExtractor
+
+<ApiLink to="class/AiSelectorExtractor">`AiSelectorExtractor`</ApiLink> asks the model for reusable CSS selectors on the first page of a route, caches them, and reuses them with no model call on later pages of the same layout, so it scales to large sites. When a page matches none of the cached selectors (a different markup variant), it generates and caches a new set, so one bucket can hold several variants. If selector generation fails, or the schema shape is unsupported, it degrades to the `fallback` extractor when one is set, and raises otherwise. Selectors are bucketed by `cache_tag`, which defaults to the request label, so each route keeps its own set. The cache is persisted to a <ApiLink to="class/KeyValueStore">`KeyValueStore`</ApiLink>, so a later run reuses selectors learned earlier.
+
+<CodeBlock className="language-python">
+    {SelectorExtractorExample}
+</CodeBlock>
+
+It supports schemas built from scalar fields, lists of scalars, lists of items, and a single nested item, one level deep. For shapes it cannot serve (such as a `dict` field), set a `fallback` or use <ApiLink to="class/AiDirectExtractor">`AiDirectExtractor`</ApiLink>.
+
+Both extractors share two more knobs. `retries` caps how many times the model may fix output that fails schema validation (default 1 for <ApiLink to="class/AiDirectExtractor">`AiDirectExtractor`</ApiLink>, 3 for <ApiLink to="class/AiSelectorExtractor">`AiSelectorExtractor`</ApiLink>). `instructions` replaces the base task instructions entirely.
+
+## Distillers
+
+A distiller reduces raw HTML to a compact representation the model reads cheaply. Each extractor uses one. Replace it with the extractor's `distiller` argument (the crawler itself has no `distiller` argument).
+
+<ApiLink to="class/AiDirectExtractor">`AiDirectExtractor`</ApiLink> defaults to an <ApiLink to="class/AiCleanHtmlDistiller">`AiCleanHtmlDistiller`</ApiLink>: cleaned, structure-preserving HTML that keeps the full page text. <ApiLink to="class/AiSelectorExtractor">`AiSelectorExtractor`</ApiLink> uses an <ApiLink to="class/AiSkeletonDistiller">`AiSkeletonDistiller`</ApiLink> internally to ask the model for selectors; you rarely set it yourself.
+
+### Custom distiller
+
+Subclass <ApiLink to="class/BaseAiHtmlDistiller">`BaseAiHtmlDistiller`</ApiLink> and implement <ApiLink to="class/BaseAiHtmlDistiller#distill">`distill`</ApiLink> to send a different representation. Set `prompt_notes` so the model knows the input format. The extractor appends the notes to its instructions.
+
+The example below converts the cleaned page to Markdown with [html-to-markdown](https://pypi.org/project/html-to-markdown/), an extra dependency:
+
+```bash
+pip install html-to-markdown
+```
+
+<CodeBlock className="language-python">
+    {CustomDistillerExample}
+</CodeBlock>
+
+## Extract options
+
+<ApiLink to="class/AiCrawlingContext#extract">`context.extract`</ApiLink> takes options alongside the schema:
+
+- `scope` - a CSS selector that restricts extraction to the first matching subtree (e.g. `main` or `article.post`). It saves tokens and keeps the model away from unrelated parts of the page.
+- `cache_tag` - the bucket for cached selectors. It defaults to the request label.
+- `additional_instructions` - extra instructions for this call, appended to the base instructions. With <ApiLink to="class/AiSelectorExtractor">`AiSelectorExtractor`</ApiLink> they steer the one-time selector generation, not each extraction, so use them to point the model at the right region.
+
+## Usage and cost
+
+Token usage accumulates on <ApiLink to="class/AiCrawlingContext#ai_usage">`context.ai_usage`</ApiLink>, and on <ApiLink to="class/AiCrawler#ai_usage">`crawler.ai_usage`</ApiLink> for the whole crawl. The accumulator is an <ApiLink to="class/AiUsageStats">`AiUsageStats`</ApiLink> with <ApiLink to="class/AiUsageStats#requests">`requests`</ApiLink>, <ApiLink to="class/AiUsageStats#input_tokens">`input_tokens`</ApiLink>, <ApiLink to="class/AiUsageStats#output_tokens">`output_tokens`</ApiLink>, and <ApiLink to="class/AiUsageStats#total_tokens">`total_tokens`</ApiLink>.
+
+To cap spend, pass `usage_limits` (a pydantic-ai `UsageLimits`) to an extractor. It applies to every model run, and <ApiLink to="class/ExtractFunction">`extract`</ApiLink> raises `UsageLimitExceeded` when a page needs more. The example below caps each extraction, logs and skips pages that exceed it, and stops the whole crawl once a token budget is spent.
+
+<CodeBlock className="language-python">
+    {UsageLimitExample}
+</CodeBlock>
+
+## Conclusion
+
+This guide introduced <ApiLink to="class/AiCrawler">`AiCrawler`</ApiLink> and its <ApiLink to="class/ExtractFunction">`extract`</ApiLink> helper, the <ApiLink to="class/AiDirectExtractor">`AiDirectExtractor`</ApiLink> and <ApiLink to="class/AiSelectorExtractor">`AiSelectorExtractor`</ApiLink> strategies, the built-in and custom distillers, the extract options, and how failures and cost are handled. If you have questions or need assistance, feel free to reach out on our [GitHub](https://github.com/apify/crawlee-python) or join our [Discord community](https://discord.com/invite/jyEM2PRvMU). Happy scraping!
diff --git a/docs/guides/architecture_overview.mdx b/docs/guides/architecture_overview.mdx
index f9c4b764fb..f86f5041da 100644
--- a/docs/guides/architecture_overview.mdx
+++ b/docs/guides/architecture_overview.mdx
@@ -49,6 +49,8 @@ class ParselCrawler
 
 class BeautifulSoupCrawler
 
+class AiCrawler
+
 class PlaywrightCrawler
 
 class AdaptivePlaywrightCrawler
@@ -65,6 +67,7 @@ BasicCrawler --|> AdaptivePlaywrightCrawler
 AbstractHttpCrawler --|> HttpCrawler
 AbstractHttpCrawler --|> ParselCrawler
 AbstractHttpCrawler --|> BeautifulSoupCrawler
+AbstractHttpCrawler --|> AiCrawler
 PlaywrightCrawler --|> StagehandCrawler
 ```
 
@@ -72,11 +75,12 @@ PlaywrightCrawler --|> StagehandCrawler
 
 HTTP crawlers use HTTP clients to fetch pages and parse them with HTML parsing libraries. They are fast and efficient for sites that do not require JavaScript rendering. HTTP clients are Crawlee components that wrap around HTTP libraries like [httpx](https://www.python-httpx.org/), [curl-impersonate](https://github.com/lwthiker/curl-impersonate) or [impit](https://apify.github.io/impit) and handle HTTP communication for requests and responses. You can learn more about them in the [HTTP clients guide](./http-clients).
 
-HTTP crawlers inherit from <ApiLink to="class/AbstractHttpCrawler">`AbstractHttpCrawler`</ApiLink> and there are three crawlers that belong to this category:
+HTTP crawlers inherit from <ApiLink to="class/AbstractHttpCrawler">`AbstractHttpCrawler`</ApiLink> and there are four crawlers that belong to this category:
 
 - <ApiLink to="class/BeautifulSoupCrawler">`BeautifulSoupCrawler`</ApiLink> utilizes the [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) HTML parser.
 - <ApiLink to="class/ParselCrawler">`ParselCrawler`</ApiLink> utilizes [Parsel](https://github.com/scrapy/parsel) for parsing HTML.
 - <ApiLink to="class/HttpCrawler">`HttpCrawler`</ApiLink> does not parse HTTP responses at all and is used when no content parsing is required.
+- <ApiLink to="class/AiCrawler">`AiCrawler`</ApiLink> parses HTML with Parsel and uses an LLM to extract structured data into a validated Pydantic model.
 
 You can learn more about HTTP crawlers in the [HTTP crawlers guide](./http-crawlers).
 
@@ -120,6 +124,8 @@ class ParselCrawlingContext
 
 class BeautifulSoupCrawlingContext
 
+class AiCrawlingContext
+
 class PlaywrightPreNavCrawlingContext
 
 class PlaywrightCrawlingContext
@@ -148,6 +154,8 @@ ParsedHttpCrawlingContext --|> ParselCrawlingContext
 
 ParsedHttpCrawlingContext --|> BeautifulSoupCrawlingContext
 
+ParselCrawlingContext --|> AiCrawlingContext
+
 BasicCrawlingContext --|> PlaywrightPreNavCrawlingContext
 
 PlaywrightPreNavCrawlingContext --|> PlaywrightCrawlingContext
@@ -168,6 +176,7 @@ They have a similar inheritance structure as the crawlers, with the base class b
 - <ApiLink to="class/ParsedHttpCrawlingContext">`ParsedHttpCrawlingContext`</ApiLink> for HTTP crawlers with parsed responses.
 - <ApiLink to="class/ParselCrawlingContext">`ParselCrawlingContext`</ApiLink> for HTTP crawlers that use [Parsel](https://github.com/scrapy/parsel) for parsing.
 - <ApiLink to="class/BeautifulSoupCrawlingContext">`BeautifulSoupCrawlingContext`</ApiLink> for HTTP crawlers that use [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) for parsing.
+- <ApiLink to="class/AiCrawlingContext">`AiCrawlingContext`</ApiLink> for the AI crawler, extending the Parsel context with an `extract` helper.
 - <ApiLink to="class/PlaywrightPreNavCrawlingContext">`PlaywrightPreNavCrawlingContext`</ApiLink> for Playwright crawlers before the page is navigated.
 - <ApiLink to="class/PlaywrightCrawlingContext">`PlaywrightCrawlingContext`</ApiLink> for Playwright crawlers.
 - <ApiLink to="class/AdaptivePlaywrightPreNavCrawlingContext">`AdaptivePlaywrightPreNavCrawlingContext`</ApiLink> for Adaptive Playwright crawlers before the page is navigated.
diff --git a/docs/guides/code_examples/ai_crawler/additional_instructions_example.py b/docs/guides/code_examples/ai_crawler/additional_instructions_example.py
new file mode 100644
index 0000000000..aae0397da9
--- /dev/null
+++ b/docs/guides/code_examples/ai_crawler/additional_instructions_example.py
@@ -0,0 +1,44 @@
+import asyncio
+
+from pydantic import BaseModel
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.openai import OpenAIProvider
+
+from crawlee.crawlers import AiCrawler, AiCrawlingContext
+
+
+class Post(BaseModel):
+    """Model representing a single post."""
+
+    title: str
+    url: str
+
+
+class Posts(BaseModel):
+    """Model representing the extracted list of posts."""
+
+    posts: list[Post]
+
+
+async def main() -> None:
+    model = OpenAIChatModel(
+        'gpt-5.4-nano',
+        provider=OpenAIProvider(api_key='your-openai-api-key'),
+    )
+    crawler = AiCrawler(model=model, max_requests_per_crawl=5)
+
+    @crawler.router.default_handler
+    async def handler(context: AiCrawlingContext) -> None:
+        # The instruction narrows what the model returns from the page.
+        posts = await context.extract(
+            Posts,
+            additional_instructions='Extract only the top five posts on the page.',
+        )
+
+        await context.push_data(posts.model_dump())
+
+    await crawler.run(['https://news.ycombinator.com'])
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/guides/code_examples/ai_crawler/basic_example.py b/docs/guides/code_examples/ai_crawler/basic_example.py
new file mode 100644
index 0000000000..7cdd458ce7
--- /dev/null
+++ b/docs/guides/code_examples/ai_crawler/basic_example.py
@@ -0,0 +1,41 @@
+import asyncio
+
+from pydantic import BaseModel
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.openai import OpenAIProvider
+
+from crawlee.crawlers import AiCrawler, AiCrawlingContext
+
+
+class Article(BaseModel):
+    """Model representing the extracted data for an article."""
+
+    title: str
+    short_text: str
+
+
+async def main() -> None:
+    model = OpenAIChatModel(
+        'gpt-5.4-nano',
+        # Set the provider with the API key explicitly.
+        provider=OpenAIProvider(api_key='your-openai-api-key'),
+    )
+
+    crawler = AiCrawler(model=model, max_requests_per_crawl=5)
+
+    @crawler.router.default_handler
+    async def handler(context: AiCrawlingContext) -> None:
+        context.log.info(f'Processing {context.request.url} ...')
+
+        # Pass a Pydantic model and get a validated instance back.
+        article = await context.extract(Article)
+
+        await context.push_data(article.model_dump())
+
+        await context.enqueue_links()
+
+    await crawler.run(['https://crawlee.dev/'])
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/guides/code_examples/ai_crawler/custom_distiller_example.py b/docs/guides/code_examples/ai_crawler/custom_distiller_example.py
new file mode 100644
index 0000000000..fb1faca290
--- /dev/null
+++ b/docs/guides/code_examples/ai_crawler/custom_distiller_example.py
@@ -0,0 +1,67 @@
+import asyncio
+
+from html_to_markdown import convert
+from lxml_html_clean import Cleaner
+from pydantic import BaseModel
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.openai import OpenAIProvider
+
+from crawlee.crawlers import (
+    AiCrawler,
+    AiCrawlingContext,
+    AiDirectExtractor,
+    BaseAiHtmlDistiller,
+    get_basic_ai_cleaner,
+)
+
+# Notes appended to the model instructions so it knows the input format.
+MARKDOWN_PROMPT_NOTES = 'The document is Markdown converted from the HTML page.'
+
+
+class MarkdownDistiller(BaseAiHtmlDistiller):
+    """Distiller that cleans the page HTML and converts it to Markdown."""
+
+    def __init__(self, cleaner: Cleaner | None = None) -> None:
+        super().__init__(prompt_notes=MARKDOWN_PROMPT_NOTES)
+
+        # Strip scripts, styles, and other noise before the conversion.
+        self._cleaner = cleaner or get_basic_ai_cleaner()
+
+    def distill(self, html: str) -> str:
+        return convert(self._cleaner.clean_html(html)).content or ''
+
+
+class Article(BaseModel):
+    """Model representing the extracted data for an article."""
+
+    title: str
+    short_text: str
+
+
+async def main() -> None:
+    model = OpenAIChatModel(
+        'gpt-5.4-nano',
+        # Set the provider with the API key explicitly.
+        provider=OpenAIProvider(api_key='your-openai-api-key'),
+    )
+    crawler = AiCrawler(
+        # Use the custom distiller to convert the page to Markdown before extraction.
+        extractor=AiDirectExtractor(model=model, distiller=MarkdownDistiller()),
+        max_requests_per_crawl=5,
+    )
+
+    @crawler.router.default_handler
+    async def handler(context: AiCrawlingContext) -> None:
+        # Pass a Pydantic model and get a validated instance back.
+        article = await context.extract(Article)
+        await context.push_data(article.model_dump())
+
+        # Enqueue links as usual, the distillation and extraction don't affect
+        # the rest of the crawling logic.
+        await context.enqueue_links()
+
+    await crawler.run(['https://crawlee.dev/'])
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/guides/code_examples/ai_crawler/selector_extractor_example.py b/docs/guides/code_examples/ai_crawler/selector_extractor_example.py
new file mode 100644
index 0000000000..f6dd98eb39
--- /dev/null
+++ b/docs/guides/code_examples/ai_crawler/selector_extractor_example.py
@@ -0,0 +1,56 @@
+import asyncio
+
+from pydantic import BaseModel
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.openai import OpenAIProvider
+
+from crawlee import Glob
+from crawlee.crawlers import (
+    AiCrawler,
+    AiCrawlingContext,
+    AiDirectExtractor,
+    AiSelectorExtractor,
+)
+
+
+class Article(BaseModel):
+    """Model representing the extracted data for an article."""
+
+    title: str
+    main_text: str
+
+
+async def main() -> None:
+    model = OpenAIChatModel(
+        'gpt-5.4-nano',
+        provider=OpenAIProvider(api_key='your-openai-api-key'),
+    )
+    crawler = AiCrawler(
+        extractor=AiSelectorExtractor(
+            model=model,
+            # Pages the cached selectors cannot handle fall back to direct extraction.
+            fallback=AiDirectExtractor(model=model),
+        ),
+        max_requests_per_crawl=10,
+    )
+
+    @crawler.router.default_handler
+    async def handler(context: AiCrawlingContext) -> None:
+        # Enqueue blog article pages; the article handler extracts the data.
+        await context.enqueue_links(
+            include=[Glob('https://crawlee.dev/blog/*')],
+            label='article',
+        )
+
+    @crawler.router.handler('article')
+    async def article_handler(context: AiCrawlingContext) -> None:
+        # The first page generates selectors; later pages reuse them with no LLM call.
+        article = await context.extract(Article)
+
+        await context.push_data(article.model_dump())
+
+    await crawler.run(['https://crawlee.dev/blog'])
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/docs/guides/code_examples/ai_crawler/usage_limit_example.py b/docs/guides/code_examples/ai_crawler/usage_limit_example.py
new file mode 100644
index 0000000000..7b0985af2f
--- /dev/null
+++ b/docs/guides/code_examples/ai_crawler/usage_limit_example.py
@@ -0,0 +1,57 @@
+import asyncio
+
+from pydantic import BaseModel
+from pydantic_ai.exceptions import UsageLimitExceeded
+from pydantic_ai.models.openai import OpenAIChatModel
+from pydantic_ai.providers.openai import OpenAIProvider
+from pydantic_ai.usage import UsageLimits
+
+from crawlee.crawlers import AiCrawler, AiCrawlingContext, AiDirectExtractor
+
+# Stop the whole crawl once this many tokens have been spent.
+TOKEN_BUDGET = 50_000
+
+
+class Article(BaseModel):
+    """Model representing the extracted data for an article."""
+
+    title: str
+    short_text: str
+
+
+async def main() -> None:
+    model = OpenAIChatModel(
+        'gpt-5.4-nano',
+        provider=OpenAIProvider(api_key='your-openai-api-key'),
+    )
+    crawler = AiCrawler(
+        # Cap each extraction so an oversized page cannot consume LLM resources.
+        extractor=AiDirectExtractor(
+            model=model,
+            usage_limits=UsageLimits(total_tokens_limit=10_000),
+        ),
+        max_requests_per_crawl=5,
+    )
+
+    @crawler.router.default_handler
+    async def handler(context: AiCrawlingContext) -> None:
+        # Stop the crawl once the cumulative token budget is exhausted.
+        if context.ai_usage.total_tokens > TOKEN_BUDGET:
+            context.log.info('Token budget exhausted, stopping the crawler.')
+            crawler.stop()
+            return
+
+        try:
+            article = await context.extract(Article)
+        except UsageLimitExceeded:
+            # The page needs more tokens than the per-extraction limit allows.
+            context.log.warning(f'Content at {context.request.url} is too large.')
+            return
+
+        await context.push_data(article.model_dump())
+
+    await crawler.run(['https://crawlee.dev/'])
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/src/crawlee/crawlers/_ai/_ai_crawler.py b/src/crawlee/crawlers/_ai/_ai_crawler.py
index 5c89d20e1f..c4a92ea56e 100644
--- a/src/crawlee/crawlers/_ai/_ai_crawler.py
+++ b/src/crawlee/crawlers/_ai/_ai_crawler.py
@@ -29,6 +29,9 @@
 
 logger = getLogger(__name__)
 
+# Default model
+_DEFAULT_AI_MODEL = 'openai:gpt-5.4-nano'
+
 
 @docs_group('Crawlers')
 class AiCrawler(AbstractHttpCrawler[AiCrawlingContext, Selector, Selector]):
@@ -86,20 +89,18 @@ def __init__(
             model: The model used for extraction, given to the default extractor (`AiDirectExtractor`). A
                 provider-prefixed name (e.g. `'openai:gpt-5.4-nano'`) or a Pydantic AI `Model` instance. When given
                 as a string, the provider reads credentials from its environment variable (e.g. `OPENAI_API_KEY`).
-                Pass a `Model` instance to supply them explicitly. Provide exactly one of `model` or `extractor`.
+                Pass a `Model` instance to supply them explicitly. Defaults to `'openai:gpt-5.4-nano'` when neither
+                `model` nor `extractor` is given. Provide at most one of `model` or `extractor`.
             extractor: A pre-configured `AiHtmlExtractor`, for full control over the distiller, instructions,
                 caching, usage limits, and model fallback. Pass an `AiSelectorExtractor` here for cached-selector
-                extraction. Provide exactly one of `model` or `extractor`.
+                extraction. Provide at most one of `model` or `extractor`.
             kwargs: Additional keyword arguments to pass to the underlying `AbstractHttpCrawler`.
         """
-        if (model is None) == (extractor is None):
-            raise ValueError('Provide exactly one of `model` or `extractor`.')
-
-        if extractor is None and model is not None:
-            extractor = AiDirectExtractor(model)
+        if model is not None and extractor is not None:
+            raise ValueError('Provide at most one of `model` or `extractor`.')
 
-        if not extractor:
-            raise ValueError('Extractor initialization failed; check the provided model or extractor configuration.')
+        if extractor is None:
+            extractor = AiDirectExtractor(model if model is not None else _DEFAULT_AI_MODEL)
 
         # Call the notification only once.
         warnings.warn(
diff --git a/tests/unit/crawlers/_ai/test_ai_crawler.py b/tests/unit/crawlers/_ai/test_ai_crawler.py
index 624e0c5e27..ada696edc0 100644
--- a/tests/unit/crawlers/_ai/test_ai_crawler.py
+++ b/tests/unit/crawlers/_ai/test_ai_crawler.py
@@ -23,11 +23,8 @@ class _Article(BaseModel):
     title: str
 
 
-def test_requires_exactly_one_of_model_or_extractor() -> None:
-    with pytest.raises(ValueError, match='exactly one'):
-        AiCrawler()
-
-    with pytest.raises(ValueError, match='exactly one'):
+def test_rejects_model_and_extractor_together() -> None:
+    with pytest.raises(ValueError, match='at most one'):
         AiCrawler(model=TestModel(), extractor=AiDirectExtractor(TestModel()))
 
 
@@ -35,6 +32,14 @@ def test_default_extractor_is_direct() -> None:
     assert isinstance(AiCrawler(model=TestModel()).extractor, AiDirectExtractor)
 
 
+def test_default_model_when_none_given(monkeypatch: pytest.MonkeyPatch) -> None:
+    # The default model is an OpenAI one, whose client needs a key at construction.
+    monkeypatch.setenv('OPENAI_API_KEY', 'test-key')
+
+    # With neither model nor extractor, the crawler builds the default direct extractor.
+    assert isinstance(AiCrawler().extractor, AiDirectExtractor)
+
+
 def test_emits_experimental_warning() -> None:
     with pytest.warns(UserWarning, match='experimental'):
         AiCrawler(model=TestModel())