# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
This module provides an API for converting state dicts and configurations of
fairseq2 models to their Hugging Face Transformer equivalents.
"""
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Mapping, Sequence, cast, final
import huggingface_hub
import transformers
from torch import Tensor
from transformers import PretrainedConfig
from typing_extensions import override
from fairseq2.error import NotSupportedError
from fairseq2.models.family import HuggingFaceExporter
from fairseq2.runtime.dependency import get_dependency_resolver
[docs]
@dataclass
class HuggingFaceConfig:
"""
Represents the configuration of a Hugging Face Transformers model.
This class is part of the :class:`HuggingFaceConverter` interface which
converts fairseq2 models to their Hugging Face equivalents.
"""
data: Mapping[str, object]
"""
Configuration data.
Each key in this mapping must correspond to an attribute of the actual
configuration class in Hugging Face Transformers.
"""
kls_name: str
"""
Name of the configuration class in Hugging Face Transformers. For instance,
Qwen3Config or LlamaConfig.
"""
arch: str | Sequence[str]
"""
Architecture(s) of the model as defined in Hugging Face Transformers. For
instance, Qwen3ForCausalLM, LlamaForCausalLM.
"""
[docs]
class HuggingFaceConverter(ABC):
"""
Converts the state dict and configuration of a fairseq2 model to its Hugging
Face Transformers equivalent.
Model authors must register their converter implementations with fairseq2
as part of library initialization as shown below:
.. code:: python
from fairseq2.models.hg import HuggingFaceConverter
from fairseq2.runtime.dependency import DependencyContainer, register_model_family
class MyModelConverter(HuggingFaceConverter):
...
def register_my_model(container: DependencyContainer) -> None:
register_model_family(container, name="my_model_family", ...)
container.register_type(
HuggingFaceConverter, MyModelConverter, key="my_model_family",
)
"""
[docs]
@abstractmethod
def to_hg_config(self, config: object) -> HuggingFaceConfig:
"""
Converts the specified fairseq2 model configuration to its Hugging Face
Transformers equivalent.
:raises TypeError: ``config`` is not of valid type. The expected type
is one registered as part of the :class:`ModelFamily`.
"""
[docs]
@abstractmethod
def to_hg_state_dict(
self, state_dict: dict[str, object], config: object
) -> dict[str, object]:
"""
Converts the specified fairseq2 state dict to its Hugging Face
Transformers equivalent.
``config`` is the fairseq2 model configuration and can be used to
adjust the converted state dict when necessary.
:raises TypeError: ``config`` is not of valid type. The expected type
is one registered as part of the :class:`ModelFamily`.
"""
# TODO: Remove in v0.9
@final
class _LegacyHuggingFaceConverter(HuggingFaceConverter):
def __init__(self, exporter: HuggingFaceExporter[Any]) -> None:
self._exporter = exporter
@override
def to_hg_config(self, config: object) -> HuggingFaceConfig:
raise NotSupportedError()
@override
def to_hg_state_dict(
self, state_dict: dict[str, object], config: object
) -> dict[str, object]:
raise NotSupportedError()
[docs]
def get_hugging_face_converter(family_name: str) -> HuggingFaceConverter:
"""
Returns the :class:`HuggingFaceConverter` of the specified model family.
:raises NotSupportedError: The model family does not support Hugging Face
conversion.
"""
resolver = get_dependency_resolver()
hg_converter = resolver.maybe_resolve(HuggingFaceConverter, key=family_name)
if hg_converter is None:
raise NotSupportedError(
f"{family_name} model family does not support Hugging Face conversion."
)
return hg_converter
[docs]
def save_hugging_face_model(
save_dir: Path, state_dict: dict[str, object], config: HuggingFaceConfig
) -> None:
"""
Saves the state dict and configuration of a Hugging Face Transformers model
to the specified directory.
:raises TypeError: ``config.kls_name`` does not correspond to the expected
:class:`PretrainedConfig` subclass of the Hugging Face model.
:raises TypeError: ``state_dict`` contains non-tensor values which is not
supported in Safetensors format.
:raises ValueError: A key in ``config`` does not have a corresponding
attribute in Hugging Face model configuration class.
:raises OSError: The state dict or configuration cannot be saved to the
file system.
"""
try:
config_kls = getattr(transformers, config.kls_name)
except AttributeError:
raise TypeError(f"`transformers.{config.kls_name}` is not a type.") from None
if not issubclass(config_kls, PretrainedConfig):
raise TypeError(
f"`transformers.{config.kls_name}` is expected to be a subclass of `{PretrainedConfig}`."
)
native_config = config_kls()
for key, value in config.data.items():
if not hasattr(native_config, key):
raise ValueError(
f"`transformers.{config.kls_name}` does not have an attribute named {key}."
)
setattr(native_config, key, value)
arch = config.arch
setattr(native_config, "architectures", [arch] if isinstance(arch, str) else arch)
native_config.save_pretrained(save_dir)
for key, value in state_dict.items():
if not isinstance(value, Tensor):
raise TypeError(
f"`state_dict[{key}]` must be of type `{Tensor}`, but is of type `{type(value)}` instead."
)
tensors = cast(dict[str, Tensor], state_dict)
huggingface_hub.save_torch_state_dict(tensors, save_dir)