Index A | B | C | D | E | F | G | H | I | K | L | M | N | O | P | Q | R | S | T | U | V | W | Y A activation (fairseq2.models.gemma4.Gemma4SubsampleConvProjection attribute) add_error() (fairseq2.utils.validation.ValidationResult method) add_sub_result() (fairseq2.utils.validation.ValidationResult method) AdditiveResidualConnect (class in fairseq2.nn) all_gather() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) all_gather_to_list() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) all_reduce() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) all_sum() (in module fairseq2.gang) apply_ac_to_gemma4() (in module fairseq2.models.gemma4) apply_chat_template() (fairseq2.models.gemma4.Gemma4Tokenizer method) apply_fsdp_to_gemma4() (in module fairseq2.models.gemma4) apply_mask() (in module fairseq2.nn.utils.mask) arch (fairseq2.models.hg.HuggingFaceConfig attribute) as_process_group() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) attention_chunk_size (fairseq2.models.gemma4.Gemma4AudioConfig attribute) attention_context_left (fairseq2.models.gemma4.Gemma4AudioConfig attribute) attention_context_right (fairseq2.models.gemma4.Gemma4AudioConfig attribute) attention_k_eq_v (fairseq2.models.gemma4.Gemma4Config attribute) attention_logit_cap (fairseq2.models.gemma4.Gemma4AudioConfig attribute) audio_config (fairseq2.models.gemma4.Gemma4Config attribute) audio_embedder (fairseq2.models.gemma4.Gemma4Model attribute) audio_token_id (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.gemma4.Gemma4Frontend attribute) audio_tower (fairseq2.models.gemma4.Gemma4Model attribute) B barrier() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) BatchLayout (class in fairseq2.nn) beta_fast (fairseq2.models.olmo.YaRNScaleConfig attribute) beta_slow (fairseq2.models.olmo.YaRNScaleConfig attribute) boh_idx (fairseq2.data.tokenizers.VocabularyInfo attribute) boh_token (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.HgTokenizerConfig attribute) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizerConfig attribute) bos_idx (fairseq2.data.tokenizers.VocabularyInfo attribute) bos_token (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.HgTokenizerConfig attribute) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizerConfig attribute) bos_token_id (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.olmo.OLMOConfig attribute) broadcast() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) broadcast_flag() (in module fairseq2.gang) broadcast_objects() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) C capacity_bytes() (fairseq2.nn.IncrementalState method) (fairseq2.nn.IncrementalStateBag method) capacity_increment (fairseq2.nn.IncrementalStateBag property) chat_template (fairseq2.models.gemma4.Gemma4Tokenizer property) (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizer property) clip_grad_norm() (fairseq2.nn.data_parallel.DataParallelFacade method) close() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gangs method) (fairseq2.gang.ProcessGroupGang method) compile_layerwise() (fairseq2.models.gemma4.Gemma4Decoder method) compile_loss() (fairseq2.models.gemma4.Gemma4Model method) compiled_max_seq_len (fairseq2.nn.BatchLayout attribute) compute_fused_loss() (fairseq2.models.gemma4.Gemma4Model method) compute_loss() (fairseq2.models.gemma4.Gemma4Model method) compute_row_mask() (in module fairseq2.nn.utils.mask) conv (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) conv_0 (fairseq2.models.gemma4.Gemma4SubsampleConvProjection attribute) conv_1 (fairseq2.models.gemma4.Gemma4SubsampleConvProjection attribute) conv_kernel_size (fairseq2.models.gemma4.Gemma4AudioConfig attribute) conv_layer_norm (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) convert_gemma4_state_dict() (in module fairseq2.models.gemma4) convert_tokens_to_ids() (fairseq2.models.hg.HgTokenizer method) (fairseq2.models.hg.tokenizer.HgTokenizer method) CorruptModelCheckpointError (class in fairseq2.model_checkpoint) create_audio_embedder() (fairseq2.models.gemma4.Gemma4Factory method) create_audio_tower() (fairseq2.models.gemma4.Gemma4Factory method) create_decoder() (fairseq2.data.tokenizers.Tokenizer method) (fairseq2.models.gemma4.Gemma4Factory method) (fairseq2.models.gemma4.Gemma4Tokenizer method) (fairseq2.models.hg.HgTokenizer method) (fairseq2.models.hg.tokenizer.HgTokenizer method) (fairseq2.models.olmo.OLMOTokenizer method) (fairseq2.models.qwen.QwenTokenizer method) create_decoder_frontend() (fairseq2.models.gemma4.Gemma4Factory method) create_decoder_layer() (fairseq2.models.gemma4.Gemma4Factory method) create_default_process_group() (fairseq2.gang.ProcessGroupGang class method) create_embedding() (fairseq2.models.gemma4.Gemma4Factory method) create_encoder() (fairseq2.data.tokenizers.Tokenizer method) (fairseq2.models.gemma4.Gemma4Tokenizer method) (fairseq2.models.hg.HgTokenizer method) (fairseq2.models.hg.tokenizer.HgTokenizer method) (fairseq2.models.olmo.OLMOTokenizer method) (fairseq2.models.qwen.QwenTokenizer method) create_fake_gangs() (in module fairseq2.gang) create_final_projection() (fairseq2.models.gemma4.Gemma4Factory method) create_fsdp_gangs() (in module fairseq2.gang) create_gang() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) create_gemma4_model() (in module fairseq2.models.gemma4) create_hg_model() (in module fairseq2.models.hg) (in module fairseq2.models.hg.factory) create_model() (fairseq2.models.gemma4.Gemma4Factory method) (fairseq2.models.hg.factory.HgFactory method) create_parallel_gangs() (in module fairseq2.gang) create_raw_encoder() (fairseq2.data.tokenizers.Tokenizer method) (fairseq2.models.gemma4.Gemma4Tokenizer method) (fairseq2.models.hg.HgTokenizer method) (fairseq2.models.hg.tokenizer.HgTokenizer method) (fairseq2.models.olmo.OLMOTokenizer method) (fairseq2.models.qwen.QwenTokenizer method) CudaContext (class in fairseq2.device) custom_model_class (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) custom_processor_class (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) D data (fairseq2.models.hg.HuggingFaceConfig attribute) DataParallelFacade (class in fairseq2.nn.data_parallel) DatasetHub (class in fairseq2.datasets.hub) DatasetHubAccessor (class in fairseq2.datasets.hub) DataTypeContext (class in fairseq2.data_type) decode() (fairseq2.models.hg.HgTokenizer method) (fairseq2.models.hg.tokenizer.HgTokenizer method) decode_from_tokens() (fairseq2.data.tokenizers.TokenDecoder method) decoder (fairseq2.models.gemma4.Gemma4Model attribute) decoder_frontend (fairseq2.models.gemma4.Gemma4Model attribute) detect_default_device() (in module fairseq2.device) device (fairseq2.gang.FakeGang property) (fairseq2.gang.Gang property) (fairseq2.gang.Gangs property) (fairseq2.gang.ProcessGroupGang property) (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) device_count() (fairseq2.device.CudaContext method) DeviceContext (class in fairseq2.device) dp (fairseq2.gang.Gangs attribute) dropout_p (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) dtype (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) E embed (fairseq2.models.gemma4.Gemma4Frontend attribute) embed_tokens_per_layer (fairseq2.models.gemma4.Gemma4Frontend attribute) Embedding (class in fairseq2.nn) embedding_pre_projection_norm (fairseq2.models.gemma4.Gemma4MultimodalAudioEmbedder attribute) embedding_projection (fairseq2.models.gemma4.Gemma4MultimodalAudioEmbedder attribute) enable_gradient_checkpointing (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) enable_moe (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) enable_ple (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) encode() (fairseq2.models.hg.HgTokenizer method) (fairseq2.models.hg.tokenizer.HgTokenizer method) encode_as_tokens() (fairseq2.data.tokenizers.TokenEncoder method) encoder (fairseq2.models.gemma4.Gemma4AudioTower attribute) eoh_idx (fairseq2.data.tokenizers.VocabularyInfo attribute) eoh_token (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.HgTokenizerConfig attribute) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizerConfig attribute) eos_idx (fairseq2.data.tokenizers.VocabularyInfo attribute) eos_token (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.HgTokenizerConfig attribute) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizerConfig attribute) eos_token_id (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.olmo.OLMOConfig attribute) errors (fairseq2.utils.validation.ValidationResult property) experts (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) extra_repr() (fairseq2.nn.RMSNorm method) (fairseq2.nn.StandardLayerNorm method) F fairseq2 module fairseq2.assets module fairseq2.data_type module fairseq2.device module fairseq2.gang module fairseq2.model_checkpoint module fairseq2.models module fairseq2.models.hg.api module fairseq2.models.hg.config module fairseq2.models.hg.factory module fairseq2.models.hg.hub module fairseq2.models.hg.tokenizer module fairseq2.recipe.composition module fairseq2.recipe.optim module fairseq2.utils.validation module FakeGang (class in fairseq2.gang) ffn (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) ffn1 (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) ffn1_layer_norm (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) ffn1_post_layer_norm (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) ffn2 (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) ffn2_layer_norm (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) ffn2_post_layer_norm (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) ffn_inner_dim (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) ffn_inner_dim_multiple_of (fairseq2.models.llama.LLaMAConfig attribute) ffn_inner_dim_multiplier (fairseq2.models.llama.LLaMAConfig attribute) ffn_inner_dim_scale (fairseq2.models.llama.LLaMAConfig attribute) final_logit_soft_cap (fairseq2.models.gemma4.Gemma4Config attribute) final_logit_softcapping (fairseq2.models.gemma4.Gemma4Config property) final_proj (fairseq2.models.gemma4.Gemma4Model attribute) forward() (fairseq2.models.gemma4.Gemma4Attention method) (fairseq2.models.gemma4.Gemma4AudioTower method) (fairseq2.models.gemma4.Gemma4ConformerAttention method) (fairseq2.models.gemma4.Gemma4ConformerBlock method) (fairseq2.models.gemma4.Gemma4ConformerEncoder method) (fairseq2.models.gemma4.Gemma4Decoder method) (fairseq2.models.gemma4.Gemma4DecoderLayer method) (fairseq2.models.gemma4.Gemma4Experts method) (fairseq2.models.gemma4.Gemma4Frontend method) (fairseq2.models.gemma4.Gemma4Model method) (fairseq2.models.gemma4.Gemma4MultimodalAudioEmbedder method) (fairseq2.models.gemma4.Gemma4Router method) (fairseq2.models.gemma4.Gemma4SubsampleConvProjection method) (fairseq2.nn.AdditiveResidualConnect method) (fairseq2.nn.Embedding method) (fairseq2.nn.LayerNorm method) (fairseq2.nn.LearnedPositionEncoder method) (fairseq2.nn.Linear method) (fairseq2.nn.PositionEncoder method) (fairseq2.nn.Projection method) (fairseq2.nn.ResidualConnect method) (fairseq2.nn.RMSNorm method) (fairseq2.nn.RotaryEncoder method) (fairseq2.nn.ScaledResidualConnect method) (fairseq2.nn.ShardedEmbedding method) (fairseq2.nn.SinusoidalPositionEncoder method) (fairseq2.nn.StandardEmbedding method) (fairseq2.nn.StandardLayerNorm method) (fairseq2.nn.TiedProjection method) from_embedding() (fairseq2.nn.ShardedEmbedding static method) G Gang (class in fairseq2.gang) GangContext (class in fairseq2.gang) GangError (class in fairseq2.gang) Gangs (class in fairseq2.gang) gangs (fairseq2.model_checkpoint.ModelCheckpointLoadOptions attribute) GEMMA4_FAMILY (in module fairseq2.models.gemma4) Gemma4Attention (class in fairseq2.models.gemma4) Gemma4AudioConfig (class in fairseq2.models.gemma4) Gemma4AudioTower (class in fairseq2.models.gemma4) Gemma4Config (class in fairseq2.models.gemma4) Gemma4ConformerAttention (class in fairseq2.models.gemma4) Gemma4ConformerBlock (class in fairseq2.models.gemma4) Gemma4ConformerEncoder (class in fairseq2.models.gemma4) Gemma4Decoder (class in fairseq2.models.gemma4) Gemma4DecoderLayer (class in fairseq2.models.gemma4) Gemma4Experts (class in fairseq2.models.gemma4) Gemma4Factory (class in fairseq2.models.gemma4) Gemma4Frontend (class in fairseq2.models.gemma4) Gemma4Model (class in fairseq2.models.gemma4) Gemma4MultimodalAudioEmbedder (class in fairseq2.models.gemma4) Gemma4Router (class in fairseq2.models.gemma4) Gemma4SubsampleConvProjection (class in fairseq2.models.gemma4) Gemma4Tokenizer (class in fairseq2.models.gemma4) get_current_device() (fairseq2.device.DeviceContext method) (in module fairseq2.device) get_current_dtype() (fairseq2.data_type.DataTypeContext method) (in module fairseq2.data_type) get_current_gangs() (fairseq2.gang.GangContext method) (in module fairseq2.gang) get_data_parallel_facade() (in module fairseq2.nn.data_parallel) get_dataset_config() (fairseq2.datasets.hub.DatasetHub method) get_default_gangs() (in module fairseq2.gang) get_device_properties() (fairseq2.device.CudaContext method) get_gemma4_26b_a4b_config() (in module fairseq2.models.gemma4) get_gemma4_31b_config() (in module fairseq2.models.gemma4) get_gemma4_e2b_config() (in module fairseq2.models.gemma4) get_gemma4_e4b_config() (in module fairseq2.models.gemma4) get_gemma4_model_hub (in module fairseq2.models.gemma4) get_gemma4_tokenizer_hub (in module fairseq2.models.gemma4) get_hg_model_hub() (in module fairseq2.models.hg) get_hg_tokenizer_hub() (in module fairseq2.models.hg) get_hugging_face_converter() (in module fairseq2.models.hg) get_model_checkpoint_loader() (in module fairseq2.model_checkpoint) get_olmo_model_hub (in module fairseq2.models.olmo) get_qwen_tokenizer_hub() (in module fairseq2.models.qwen) get_shard_dims() (fairseq2.nn.ShardedEmbedding method) get_tokenizer_config() (fairseq2.data.tokenizers.hub.TokenizerHub method) global_head_dim (fairseq2.models.gemma4.Gemma4Config attribute) gradient_clipping (fairseq2.models.gemma4.Gemma4AudioConfig attribute) (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) H has_error() (fairseq2.utils.validation.ValidationResult method) has_ple (fairseq2.models.gemma4.Gemma4Config property) head_dim (fairseq2.models.gemma4.Gemma4Attention attribute) (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.gemma4.Gemma4ConformerAttention attribute) (fairseq2.models.qwen.QwenConfig attribute) hf_name (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) HgFactory (class in fairseq2.models.hg.factory) HgTokenizer (class in fairseq2.models.hg) (class in fairseq2.models.hg.tokenizer) HgTokenizerConfig (class in fairseq2.models.hg) (class in fairseq2.models.hg.tokenizer) hidden_activation (fairseq2.models.gemma4.Gemma4Config attribute) hidden_size (fairseq2.models.gemma4.Gemma4AudioConfig attribute) hidden_size_per_layer_input (fairseq2.models.gemma4.Gemma4Config attribute) HuggingFaceConfig (class in fairseq2.models.hg) HuggingFaceConverter (class in fairseq2.models.hg) HuggingFaceModelConfig (class in fairseq2.models.hg) (class in fairseq2.models.hg.config) HuggingFaceModelError, [1] I impl (fairseq2.models.llama.LLaMATokenizerConfig attribute) increment_step_nr() (fairseq2.nn.IncrementalStateBag method) IncrementalState (class in fairseq2.nn) IncrementalStateBag (class in fairseq2.nn) init_scaled_embedding() (in module fairseq2.nn) init_std (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) init_std_scale (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) input_feat_size (fairseq2.models.gemma4.Gemma4AudioConfig attribute) input_layernorm (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) is_available() (fairseq2.device.CudaContext method) is_kv_consumer (fairseq2.models.gemma4.Gemma4Attention attribute) iter_cards() (fairseq2.data.tokenizers.hub.TokenizerHub method) (fairseq2.datasets.hub.DatasetHub method) K k_eq_v (fairseq2.models.gemma4.Gemma4Attention attribute) k_norm (fairseq2.models.qwen.QwenConfig attribute) k_proj (fairseq2.models.gemma4.Gemma4ConformerAttention attribute) kls_name (fairseq2.models.hg.HuggingFaceConfig attribute) L layer_norm (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) (fairseq2.models.gemma4.Gemma4Decoder attribute) layer_types (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.olmo.OLMOConfig attribute) LayerNorm (class in fairseq2.nn) layers (fairseq2.models.gemma4.Gemma4ConformerEncoder attribute) (fairseq2.models.gemma4.Gemma4Decoder attribute) lazy_load() (fairseq2.model_checkpoint.ModelCheckpointLoader method) LearnedPositionEncoder (class in fairseq2.nn) Linear (class in fairseq2.nn) LLaMAConfig (class in fairseq2.models.llama) LLaMATokenizerConfig (class in fairseq2.models.llama) load_causal_lm() (in module fairseq2.models.hg) (in module fairseq2.models.hg.api) load_custom_tokenizer() (fairseq2.data.tokenizers.hub.TokenizerHub method) load_gemma4_tokenizer() (in module fairseq2.models.gemma4) load_hg_model_simple() (in module fairseq2.models.hg) (in module fairseq2.models.hg.api) load_hg_tokenizer() (in module fairseq2.models.hg) (in module fairseq2.models.hg.tokenizer) load_hg_tokenizer_simple() (in module fairseq2.models.hg) (in module fairseq2.models.hg.api) load_kwargs (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) load_model() (in module fairseq2.models) (in module fairseq2.models.hub) load_multimodal_model() (in module fairseq2.models.hg) (in module fairseq2.models.hg.api) load_olmo_tokenizer() (in module fairseq2.models.olmo) load_seq2seq_lm() (in module fairseq2.models.hg) (in module fairseq2.models.hg.api) load_state_dict() (fairseq2.nn.data_parallel.DataParallelFacade method) load_tokenizer() (fairseq2.data.tokenizers.hub.TokenizerHub method) (in module fairseq2.data.tokenizers.hub) LocalRankOutOfRangeError (class in fairseq2.device) M MAX (fairseq2.gang.ReduceOperation attribute) max_num_steps (fairseq2.nn.IncrementalStateBag property) max_seq_len (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) (fairseq2.nn.BatchLayout property) maybe_get_state() (fairseq2.nn.IncrementalStateBag method) maybe_raise_param_group_length_error() (in module fairseq2.recipe.optim) MEAN (fairseq2.gang.ReduceOperation attribute) memory_stats() (fairseq2.device.CudaContext method) MIN (fairseq2.gang.ReduceOperation attribute) min_seq_len (fairseq2.nn.BatchLayout property) mmap (fairseq2.model_checkpoint.ModelCheckpointLoadOptions attribute) model (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizer property) model_dim (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.gemma4.Gemma4Experts attribute) (fairseq2.models.gemma4.Gemma4Model attribute) (fairseq2.models.gemma4.Gemma4Router attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) model_type (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) ModelArchitectureNotKnownError (class in fairseq2.models) ModelCheckpointLoader (class in fairseq2.model_checkpoint) ModelCheckpointLoadOptions (class in fairseq2.model_checkpoint) ModelFamilyNotKnownError (class in fairseq2.models) ModelHub (class in fairseq2.models) ModelHubAccessor (class in fairseq2.models) (class in fairseq2.models.hub) ModelNotKnownError (class in fairseq2.models) module fairseq2 fairseq2.assets fairseq2.data_type fairseq2.device fairseq2.gang fairseq2.model_checkpoint fairseq2.models fairseq2.models.hg.api fairseq2.models.hg.config fairseq2.models.hg.factory fairseq2.models.hg.hub fairseq2.models.hg.tokenizer fairseq2.recipe.composition fairseq2.recipe.optim fairseq2.utils.validation moe_intermediate_size (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.gemma4.Gemma4Experts attribute) mscale (fairseq2.models.olmo.YaRNScaleConfig attribute) mscale_all_dim (fairseq2.models.olmo.YaRNScaleConfig attribute) N no_sync() (fairseq2.nn.data_parallel.DataParallelFacade method) norm_0 (fairseq2.models.gemma4.Gemma4SubsampleConvProjection attribute) norm_1 (fairseq2.models.gemma4.Gemma4SubsampleConvProjection attribute) num_attention_heads (fairseq2.models.gemma4.Gemma4AudioConfig attribute) num_attn_heads (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) num_experts (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.gemma4.Gemma4Experts attribute) (fairseq2.models.gemma4.Gemma4Router attribute) num_global_key_value_heads (fairseq2.models.gemma4.Gemma4Config attribute) num_heads (fairseq2.models.gemma4.Gemma4Attention attribute) (fairseq2.models.gemma4.Gemma4ConformerAttention attribute) num_hidden_layers (fairseq2.models.gemma4.Gemma4AudioConfig attribute) num_key_value_heads (fairseq2.models.gemma4.Gemma4Attention attribute) (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) num_kv_shared_layers (fairseq2.models.gemma4.Gemma4Config attribute) num_layers (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.gemma4.Gemma4Frontend attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) num_query_groups (fairseq2.models.gemma4.Gemma4Attention attribute) O ObjectValidator (class in fairseq2.utils.validation) of() (fairseq2.nn.BatchLayout static method) OLMO_FAMILY (in module fairseq2.models.olmo) OLMOConfig (class in fairseq2.models.olmo) OLMOTokenizer (class in fairseq2.models.olmo) OLMOTokenizerConfig (class in fairseq2.models.olmo) open_custom_dataset() (fairseq2.datasets.hub.DatasetHub method) open_dataset() (fairseq2.datasets.hub.DatasetHub method) original_max_seq_len (fairseq2.models.olmo.YaRNScaleConfig attribute) output_proj (fairseq2.models.gemma4.Gemma4AudioTower attribute) (fairseq2.models.gemma4.Gemma4ConformerAttention attribute) output_proj_dims (fairseq2.models.gemma4.Gemma4AudioConfig attribute) P packed (fairseq2.nn.BatchLayout property) pad_idx (fairseq2.data.tokenizers.VocabularyInfo attribute) (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.gemma4.Gemma4Model attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) pad_token (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.HgTokenizerConfig attribute) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizerConfig attribute) pad_token_id (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizer property) padded (fairseq2.nn.BatchLayout property) partial_rotary_factor (fairseq2.models.gemma4.Gemma4Config attribute) per_layer_input_gate (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) per_layer_model_projection (fairseq2.models.gemma4.Gemma4Frontend attribute) per_layer_projection (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) per_layer_projection_norm (fairseq2.models.gemma4.Gemma4Frontend attribute) ple_hidden_dim (fairseq2.models.gemma4.Gemma4Config property) (fairseq2.models.gemma4.Gemma4Frontend attribute) position_indices (fairseq2.nn.BatchLayout property) PositionEncoder (class in fairseq2.nn) post_attention_layernorm (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) post_feedforward_layernorm (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) post_feedforward_layernorm_1 (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) post_feedforward_layernorm_2 (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) post_per_layer_input_norm (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) pp (fairseq2.gang.Gangs attribute) pre_feedforward_layernorm (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) pre_feedforward_layernorm_2 (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) prefix_indices (fairseq2.data.tokenizers.TokenEncoder property) prepare_parameter_groups() (in module fairseq2.recipe.optim) ProcessGroupGang (class in fairseq2.gang) PRODUCT (fairseq2.gang.ReduceOperation attribute) proj (fairseq2.models.gemma4.Gemma4SubsampleConvProjection attribute) Projection (class in fairseq2.nn) Q q_norm (fairseq2.models.qwen.QwenConfig attribute) q_proj (fairseq2.models.gemma4.Gemma4ConformerAttention attribute) qkv_proj_bias (fairseq2.models.qwen.QwenConfig attribute) QWEN_FAMILY (in module fairseq2.models.qwen) QwenConfig (class in fairseq2.models.qwen) QwenTokenizer (class in fairseq2.models.qwen) QwenTokenizerConfig (class in fairseq2.models.qwen) R rank (fairseq2.gang.FakeGang property) (fairseq2.gang.Gang property) (fairseq2.gang.ProcessGroupGang property) raw (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizer property) rdp (fairseq2.gang.Gangs attribute) ReduceOperation (class in fairseq2.gang) register_gemma4_configs() (in module fairseq2.models.gemma4) register_hg_configs() (in module fairseq2.models.hg.config) register_hg_model_class() (in module fairseq2.models.hg) (in module fairseq2.models.hg.factory) register_recipe_assets() (in module fairseq2.recipe.composition) reorder() (fairseq2.nn.IncrementalState method) (fairseq2.nn.IncrementalStateBag method) reset_non_persistent_buffers() (fairseq2.models.gemma4.Gemma4Frontend method) (fairseq2.nn.RotaryEncoder method) (fairseq2.nn.SinusoidalPositionEncoder method) reset_parameters() (fairseq2.nn.LearnedPositionEncoder method) (fairseq2.nn.Linear method) (fairseq2.nn.RMSNorm method) (fairseq2.nn.RotaryEncoder method) (fairseq2.nn.ShardedEmbedding method) (fairseq2.nn.SinusoidalPositionEncoder method) (fairseq2.nn.StandardEmbedding method) (fairseq2.nn.StandardLayerNorm method) reset_peak_memory_stats() (fairseq2.device.CudaContext method) reshard_tensor() (in module fairseq2.model_checkpoint) residual_weight (fairseq2.models.gemma4.Gemma4AudioConfig attribute) (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) ResidualConnect (class in fairseq2.nn) restrict (fairseq2.model_checkpoint.ModelCheckpointLoadOptions attribute) result (fairseq2.utils.validation.ValidationError attribute) rms_norm_eps (fairseq2.models.gemma4.Gemma4AudioConfig attribute) (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.olmo.OLMOConfig attribute) RMSNorm (class in fairseq2.nn) root (fairseq2.gang.Gangs attribute) rope_scale (fairseq2.models.llama.LLaMAConfig attribute) rope_theta (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) rope_theta_global (fairseq2.models.gemma4.Gemma4Config attribute) RotaryEncoder (class in fairseq2.nn) router (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) S save_hugging_face_model() (in module fairseq2.models.hg) scalar_root_size (fairseq2.models.gemma4.Gemma4Router attribute) scale (fairseq2.models.gemma4.Gemma4Frontend attribute) scale_factor (fairseq2.models.olmo.YaRNScaleConfig attribute) ScaledResidualConnect (class in fairseq2.nn) sdp (fairseq2.gang.Gangs attribute) sdpa (fairseq2.models.gemma4.Gemma4ConformerAttention attribute) self_attn (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) (fairseq2.models.gemma4.Gemma4DecoderLayer attribute) self_attn_layer_norm (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) self_attn_post_norm (fairseq2.models.gemma4.Gemma4ConformerBlock attribute) seq_begin_indices (fairseq2.nn.BatchLayout property) seq_begin_indices_pt (fairseq2.nn.BatchLayout property) seq_lens (fairseq2.nn.BatchLayout property) seq_lens_pt (fairseq2.nn.BatchLayout property) set_data_parallel_facade() (in module fairseq2.nn.data_parallel) set_default_gangs() (in module fairseq2.gang) set_device() (fairseq2.device.DeviceContext method) (in module fairseq2.device) set_dtype() (fairseq2.data_type.DataTypeContext method) (in module fairseq2.data_type) set_gangs() (fairseq2.gang.GangContext method) (in module fairseq2.gang) set_state() (fairseq2.nn.IncrementalStateBag method) shard_embed_dim (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) ShardedEmbedding (class in fairseq2.nn) SinusoidalPositionEncoder (class in fairseq2.nn) size (fairseq2.data.tokenizers.VocabularyInfo attribute) (fairseq2.gang.FakeGang property) (fairseq2.gang.Gang property) (fairseq2.gang.ProcessGroupGang property) size_bytes() (fairseq2.nn.IncrementalState method) (fairseq2.nn.IncrementalStateBag method) sliding_window (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.olmo.OLMOConfig attribute) split_regex (fairseq2.models.llama.LLaMATokenizerConfig attribute) StandardEmbedding (class in fairseq2.nn) StandardLayerNorm (class in fairseq2.nn) StandardObjectValidator (class in fairseq2.utils.validation) state_dict() (fairseq2.nn.data_parallel.DataParallelFacade method) state_dict_converter (fairseq2.model_checkpoint.ModelCheckpointLoadOptions attribute) step_nr (fairseq2.nn.IncrementalStateBag property) sub_results (fairseq2.utils.validation.ValidationResult property) subsample (fairseq2.models.gemma4.Gemma4AudioTower attribute) subsampling_conv_channels (fairseq2.models.gemma4.Gemma4AudioConfig attribute) suffix_indices (fairseq2.data.tokenizers.TokenEncoder property) SUM (fairseq2.gang.ReduceOperation attribute) summon_full_parameters() (fairseq2.nn.data_parallel.DataParallelFacade method) supports_path() (fairseq2.model_checkpoint.ModelCheckpointLoader method) supports_process_group (fairseq2.gang.FakeGang property) (fairseq2.gang.Gang property) (fairseq2.gang.ProcessGroupGang property) SupportsDeviceTransfer (class in fairseq2.device) T tied_embeddings (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) TiedProjection (class in fairseq2.nn) to() (fairseq2.device.SupportsDeviceTransfer method) to_embedding() (fairseq2.nn.ShardedEmbedding method) to_hg_config() (fairseq2.models.hg.HuggingFaceConverter method) to_hg_state_dict() (fairseq2.models.hg.HuggingFaceConverter method) TokenDecoder (class in fairseq2.data.tokenizers) TokenEncoder (class in fairseq2.data.tokenizers) Tokenizer (class in fairseq2.data.tokenizers) TokenizerHub (class in fairseq2.data.tokenizers.hub) TokenizerHubAccessor (class in fairseq2.data.tokenizers.hub) top_k (fairseq2.models.gemma4.Gemma4Router attribute) top_k_experts (fairseq2.models.gemma4.Gemma4Config attribute) tp (fairseq2.gang.Gangs attribute) truncate (fairseq2.models.olmo.YaRNScaleConfig attribute) trust_remote_code (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) U unk_idx (fairseq2.data.tokenizers.VocabularyInfo attribute) unk_token (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.HgTokenizerConfig attribute) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizerConfig attribute) use_double_wide_mlp (fairseq2.models.gemma4.Gemma4Config attribute) use_eot (fairseq2.models.llama.LLaMATokenizerConfig attribute) use_im_end (fairseq2.models.olmo.OLMOTokenizerConfig attribute) (fairseq2.models.qwen.QwenTokenizerConfig attribute) use_processor (fairseq2.models.hg.config.HuggingFaceModelConfig attribute) (fairseq2.models.hg.HuggingFaceModelConfig attribute) use_scaled_rope (fairseq2.models.llama.LLaMAConfig attribute) V v_proj (fairseq2.models.gemma4.Gemma4ConformerAttention attribute) Validatable (class in fairseq2.utils.validation) validate() (fairseq2.utils.validation.ObjectValidator method) (fairseq2.utils.validation.StandardObjectValidator method) (fairseq2.utils.validation.Validatable method) ValidationError ValidationResult (class in fairseq2.utils.validation) vocab_info (fairseq2.data.tokenizers.Tokenizer property) (fairseq2.models.gemma4.Gemma4Tokenizer property) (fairseq2.models.hg.HgTokenizer property) (fairseq2.models.hg.tokenizer.HgTokenizer property) (fairseq2.models.olmo.OLMOTokenizer property) (fairseq2.models.qwen.QwenTokenizer property) vocab_size (fairseq2.models.gemma4.Gemma4Config attribute) (fairseq2.models.llama.LLaMAConfig attribute) (fairseq2.models.olmo.OLMOConfig attribute) (fairseq2.models.qwen.QwenConfig attribute) vocab_size_per_layer_input (fairseq2.models.gemma4.Gemma4Config attribute) VocabularyInfo (class in fairseq2.data.tokenizers) W width (fairseq2.nn.BatchLayout property) Y yarn_scale_config (fairseq2.models.olmo.OLMOConfig attribute) YaRNScaleConfig (class in fairseq2.models.olmo)