Source code for fairseq2.models.hg.converter

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
This module provides an API for converting state dicts and configurations of
fairseq2 models to their Hugging Face Transformer equivalents.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Mapping, Sequence, cast, final

import huggingface_hub
import transformers
from torch import Tensor
from transformers import PretrainedConfig
from typing_extensions import override

from fairseq2.error import NotSupportedError
from fairseq2.models.family import HuggingFaceExporter
from fairseq2.runtime.dependency import get_dependency_resolver


[docs] @dataclass class HuggingFaceConfig: """ Represents the configuration of a Hugging Face Transformers model. This class is part of the :class:`HuggingFaceConverter` interface which converts fairseq2 models to their Hugging Face equivalents. """ data: Mapping[str, object] """ Configuration data. Each key in this mapping must correspond to an attribute of the actual configuration class in Hugging Face Transformers. """ kls_name: str """ Name of the configuration class in Hugging Face Transformers. For instance, Qwen3Config or LlamaConfig. """ arch: str | Sequence[str] """ Architecture(s) of the model as defined in Hugging Face Transformers. For instance, Qwen3ForCausalLM, LlamaForCausalLM. """
[docs] class HuggingFaceConverter(ABC): """ Converts the state dict and configuration of a fairseq2 model to its Hugging Face Transformers equivalent. Model authors must register their converter implementations with fairseq2 as part of library initialization as shown below: .. code:: python from fairseq2.models.hg import HuggingFaceConverter from fairseq2.runtime.dependency import DependencyContainer, register_model_family class MyModelConverter(HuggingFaceConverter): ... def register_my_model(container: DependencyContainer) -> None: register_model_family(container, name="my_model_family", ...) container.register_type( HuggingFaceConverter, MyModelConverter, key="my_model_family", ) """
[docs] @abstractmethod def to_hg_config(self, config: object) -> HuggingFaceConfig: """ Converts the specified fairseq2 model configuration to its Hugging Face Transformers equivalent. :raises TypeError: ``config`` is not of valid type. The expected type is one registered as part of the :class:`ModelFamily`. """
[docs] @abstractmethod def to_hg_state_dict( self, state_dict: dict[str, object], config: object ) -> dict[str, object]: """ Converts the specified fairseq2 state dict to its Hugging Face Transformers equivalent. ``config`` is the fairseq2 model configuration and can be used to adjust the converted state dict when necessary. :raises TypeError: ``config`` is not of valid type. The expected type is one registered as part of the :class:`ModelFamily`. """
# TODO: Remove in v0.9 @final class _LegacyHuggingFaceConverter(HuggingFaceConverter): def __init__(self, exporter: HuggingFaceExporter[Any]) -> None: self._exporter = exporter @override def to_hg_config(self, config: object) -> HuggingFaceConfig: raise NotSupportedError() @override def to_hg_state_dict( self, state_dict: dict[str, object], config: object ) -> dict[str, object]: raise NotSupportedError()
[docs] def get_hugging_face_converter(family_name: str) -> HuggingFaceConverter: """ Returns the :class:`HuggingFaceConverter` of the specified model family. :raises NotSupportedError: The model family does not support Hugging Face conversion. """ resolver = get_dependency_resolver() hg_converter = resolver.maybe_resolve(HuggingFaceConverter, key=family_name) if hg_converter is None: raise NotSupportedError( f"{family_name} model family does not support Hugging Face conversion." ) return hg_converter
[docs] def save_hugging_face_model( save_dir: Path, state_dict: dict[str, object], config: HuggingFaceConfig ) -> None: """ Saves the state dict and configuration of a Hugging Face Transformers model to the specified directory. :raises TypeError: ``config.kls_name`` does not correspond to the expected :class:`PretrainedConfig` subclass of the Hugging Face model. :raises TypeError: ``state_dict`` contains non-tensor values which is not supported in Safetensors format. :raises ValueError: A key in ``config`` does not have a corresponding attribute in Hugging Face model configuration class. :raises OSError: The state dict or configuration cannot be saved to the file system. """ try: config_kls = getattr(transformers, config.kls_name) except AttributeError: raise TypeError(f"`transformers.{config.kls_name}` is not a type.") from None if not issubclass(config_kls, PretrainedConfig): raise TypeError( f"`transformers.{config.kls_name}` is expected to be a subclass of `{PretrainedConfig}`." ) native_config = config_kls() for key, value in config.data.items(): if not hasattr(native_config, key): raise ValueError( f"`transformers.{config.kls_name}` does not have an attribute named {key}." ) setattr(native_config, key, value) arch = config.arch setattr(native_config, "architectures", [arch] if isinstance(arch, str) else arch) native_config.save_pretrained(save_dir) for key, value in state_dict.items(): if not isinstance(value, Tensor): raise TypeError( f"`state_dict[{key}]` must be of type `{Tensor}`, but is of type `{type(value)}` instead." ) tensors = cast(dict[str, Tensor], state_dict) huggingface_hub.save_torch_state_dict(tensors, save_dir)