vllm.renderers.mistral ¶

logger `module-attribute` ¶

logger = init_logger(__name__)

MistralRenderer ¶

Bases: RendererLike

Source code in vllm/renderers/mistral.py

class MistralRenderer(RendererLike):
    @classmethod
    def from_config(
        cls,
        config: ModelConfig,
        tokenizer_kwargs: dict[str, Any],
    ) -> "RendererLike":
        return cls(config, tokenizer_kwargs)

    def __init__(
        self,
        config: ModelConfig,
        tokenizer_kwargs: dict[str, Any],
    ) -> None:
        super().__init__()

        self.config = config

        if config.skip_tokenizer_init:
            tokenizer = None
        else:
            tokenizer = cached_get_tokenizer(
                tokenizer_cls=MistralTokenizer,
                **tokenizer_kwargs,
            )

        self._tokenizer = tokenizer

        self._apply_chat_template_executor = ThreadPoolExecutor(max_workers=1)
        self._apply_chat_template_async = make_async(
            safe_apply_chat_template, executor=self._apply_chat_template_executor
        )

    @property
    def tokenizer(self) -> MistralTokenizer | None:
        return self._tokenizer

    def get_tokenizer(self) -> MistralTokenizer:
        tokenizer = self.tokenizer
        if tokenizer is None:
            raise ValueError("Tokenizer not available when `skip_tokenizer_init=True`")

        return tokenizer

    def render_messages(
        self,
        messages: list[ChatCompletionMessageParam],
        **kwargs,
    ) -> tuple[list[ConversationMessage], TextPrompt | TokensPrompt]:
        tokenizer = self.get_tokenizer()
        conversation, mm_data, mm_uuids = parse_chat_messages(
            messages,
            self.config,
            content_format="string",
        )

        prompt_raw = safe_apply_chat_template(tokenizer, messages, **kwargs)

        prompt = (
            TextPrompt(prompt=prompt_raw)
            if isinstance(prompt_raw, str)
            else TokensPrompt(prompt_token_ids=prompt_raw)
        )
        if mm_data is not None:
            prompt["multi_modal_data"] = mm_data
        if mm_uuids is not None:
            prompt["multi_modal_uuids"] = mm_uuids

        return conversation, prompt  # type: ignore[return-value]

    async def render_messages_async(
        self,
        messages: list[ChatCompletionMessageParam],
        **kwargs,
    ) -> tuple[list[ConversationMessage], TextPrompt | TokensPrompt]:
        tokenizer = self.get_tokenizer()
        conversation, mm_data, mm_uuids = await parse_chat_messages_async(
            messages,
            self.config,
            content_format="string",
        )

        prompt_raw = await self._apply_chat_template_async(
            tokenizer, messages, **kwargs
        )

        prompt = (
            TextPrompt(prompt=prompt_raw)
            if isinstance(prompt_raw, str)
            else TokensPrompt(prompt_token_ids=prompt_raw)
        )
        if mm_data is not None:
            prompt["multi_modal_data"] = mm_data
        if mm_uuids is not None:
            prompt["multi_modal_uuids"] = mm_uuids

        return conversation, prompt  # type: ignore[return-value]

_apply_chat_template_async `instance-attribute` ¶

_apply_chat_template_async = make_async(
    safe_apply_chat_template,
    executor=_apply_chat_template_executor,
)

_apply_chat_template_executor `instance-attribute` ¶

_apply_chat_template_executor = ThreadPoolExecutor(
    max_workers=1
)

_tokenizer `instance-attribute` ¶

_tokenizer = tokenizer

config `instance-attribute` ¶

config = config

tokenizer `property` ¶

tokenizer: MistralTokenizer | None

init ¶

__init__(
    config: ModelConfig, tokenizer_kwargs: dict[str, Any]
) -> None

Source code in vllm/renderers/mistral.py

def __init__(
    self,
    config: ModelConfig,
    tokenizer_kwargs: dict[str, Any],
) -> None:
    super().__init__()

    self.config = config

    if config.skip_tokenizer_init:
        tokenizer = None
    else:
        tokenizer = cached_get_tokenizer(
            tokenizer_cls=MistralTokenizer,
            **tokenizer_kwargs,
        )

    self._tokenizer = tokenizer

    self._apply_chat_template_executor = ThreadPoolExecutor(max_workers=1)
    self._apply_chat_template_async = make_async(
        safe_apply_chat_template, executor=self._apply_chat_template_executor
    )

from_config `classmethod` ¶

from_config(
    config: ModelConfig, tokenizer_kwargs: dict[str, Any]
) -> RendererLike

Source code in vllm/renderers/mistral.py

@classmethod
def from_config(
    cls,
    config: ModelConfig,
    tokenizer_kwargs: dict[str, Any],
) -> "RendererLike":
    return cls(config, tokenizer_kwargs)

get_tokenizer ¶

get_tokenizer() -> MistralTokenizer

Source code in vllm/renderers/mistral.py

def get_tokenizer(self) -> MistralTokenizer:
    tokenizer = self.tokenizer
    if tokenizer is None:
        raise ValueError("Tokenizer not available when `skip_tokenizer_init=True`")

    return tokenizer

render_messages ¶

render_messages(
    messages: list[ChatCompletionMessageParam], **kwargs
) -> tuple[
    list[ConversationMessage], TextPrompt | TokensPrompt
]

Source code in vllm/renderers/mistral.py

def render_messages(
    self,
    messages: list[ChatCompletionMessageParam],
    **kwargs,
) -> tuple[list[ConversationMessage], TextPrompt | TokensPrompt]:
    tokenizer = self.get_tokenizer()
    conversation, mm_data, mm_uuids = parse_chat_messages(
        messages,
        self.config,
        content_format="string",
    )

    prompt_raw = safe_apply_chat_template(tokenizer, messages, **kwargs)

    prompt = (
        TextPrompt(prompt=prompt_raw)
        if isinstance(prompt_raw, str)
        else TokensPrompt(prompt_token_ids=prompt_raw)
    )
    if mm_data is not None:
        prompt["multi_modal_data"] = mm_data
    if mm_uuids is not None:
        prompt["multi_modal_uuids"] = mm_uuids

    return conversation, prompt  # type: ignore[return-value]

render_messages_async `async` ¶

render_messages_async(
    messages: list[ChatCompletionMessageParam], **kwargs
) -> tuple[
    list[ConversationMessage], TextPrompt | TokensPrompt
]

Source code in vllm/renderers/mistral.py

async def render_messages_async(
    self,
    messages: list[ChatCompletionMessageParam],
    **kwargs,
) -> tuple[list[ConversationMessage], TextPrompt | TokensPrompt]:
    tokenizer = self.get_tokenizer()
    conversation, mm_data, mm_uuids = await parse_chat_messages_async(
        messages,
        self.config,
        content_format="string",
    )

    prompt_raw = await self._apply_chat_template_async(
        tokenizer, messages, **kwargs
    )

    prompt = (
        TextPrompt(prompt=prompt_raw)
        if isinstance(prompt_raw, str)
        else TokensPrompt(prompt_token_ids=prompt_raw)
    )
    if mm_data is not None:
        prompt["multi_modal_data"] = mm_data
    if mm_uuids is not None:
        prompt["multi_modal_uuids"] = mm_uuids

    return conversation, prompt  # type: ignore[return-value]

safe_apply_chat_template ¶

safe_apply_chat_template(
    tokenizer: MistralTokenizer,
    messages: list[ChatCompletionMessageParam],
    **kwargs,
) -> str | list[int]

Source code in vllm/renderers/mistral.py

def safe_apply_chat_template(
    tokenizer: MistralTokenizer,
    messages: list[ChatCompletionMessageParam],
    **kwargs,
) -> str | list[int]:
    from mistral_common.exceptions import MistralCommonException

    try:
        return tokenizer.apply_chat_template(messages, **kwargs)
    # mistral-common uses assert statements to stop processing of input
    # if input does not comply with the expected format.
    # We convert those assertion errors to ValueErrors so they can be
    # properly caught in the preprocessing_input step
    except (AssertionError, MistralCommonException) as e:
        raise ValueError(str(e)) from e

    # External library exceptions can sometimes occur despite the framework's
    # internal exception management capabilities.
    except Exception as e:
        # Log and report any library-related exceptions for further
        # investigation.
        logger.exception(
            "An error occurred in `mistral_common` while applying chat template"
        )
        raise ValueError(str(e)) from e

vllm.renderers.mistral ¶

logger module-attribute ¶

MistralRenderer ¶

_apply_chat_template_async instance-attribute ¶

_apply_chat_template_executor instance-attribute ¶

_tokenizer instance-attribute ¶

config instance-attribute ¶

tokenizer property ¶

__init__ ¶

from_config classmethod ¶

get_tokenizer ¶

render_messages ¶

render_messages_async async ¶

safe_apply_chat_template ¶

logger `module-attribute` ¶

_apply_chat_template_async `instance-attribute` ¶

_apply_chat_template_executor `instance-attribute` ¶

_tokenizer `instance-attribute` ¶

config `instance-attribute` ¶

tokenizer `property` ¶

init ¶

from_config `classmethod` ¶

render_messages_async `async` ¶