Index A | B | C | D | E | F | G | H | I | K | L | M | N | O | P | Q | R | S | T | V | W A AdditiveResidualConnect (class in fairseq2.nn) all_gather() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) all_gather_to_list() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) all_reduce() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) all_sum() (in module fairseq2.gang) apply_mask() (in module fairseq2.nn.utils.mask) as_process_group() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) B barrier() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) BatchLayout (class in fairseq2.nn) broadcast() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) broadcast_flag() (in module fairseq2.gang) broadcast_objects() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) C capacity_bytes() (fairseq2.nn.IncrementalState method) (fairseq2.nn.IncrementalStateBag method) capacity_increment (fairseq2.nn.IncrementalStateBag property) close() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gangs method) (fairseq2.gang.ProcessGroupGang method) compiled_max_seq_len (fairseq2.nn.BatchLayout attribute) compute_row_mask() (in module fairseq2.nn.utils.mask) convert_qwen_state_dict() (in module fairseq2.models.qwen) create_decoder() (fairseq2.models.qwen.QwenTokenizer method) create_default_process_group() (fairseq2.gang.ProcessGroupGang class method) create_encoder() (fairseq2.models.qwen.QwenTokenizer method) create_fake_gangs() (in module fairseq2.gang) create_fsdp_gangs() (in module fairseq2.gang) create_gang() (fairseq2.gang.FakeGang method) (fairseq2.gang.Gang method) (fairseq2.gang.ProcessGroupGang method) create_parallel_gangs() (in module fairseq2.gang) create_qwen_model() (in module fairseq2.models.qwen) create_raw_encoder() (fairseq2.models.qwen.QwenTokenizer method) D device (fairseq2.gang.FakeGang property) (fairseq2.gang.Gang property) (fairseq2.gang.ProcessGroupGang property) dp (fairseq2.gang.Gangs attribute) dropout_p (fairseq2.models.qwen.QwenConfig attribute) E Embedding (class in fairseq2.nn) export_qwen() (in module fairseq2.models.qwen) extra_repr() (fairseq2.nn.RMSNorm method) (fairseq2.nn.StandardLayerNorm method) F fairseq2.assets module FakeGang (class in fairseq2.gang) ffn_inner_dim (fairseq2.models.qwen.QwenConfig attribute) forward() (fairseq2.nn.AdditiveResidualConnect method) (fairseq2.nn.Embedding method) (fairseq2.nn.LayerNorm method) (fairseq2.nn.LearnedPositionEncoder method) (fairseq2.nn.Linear method) (fairseq2.nn.PositionEncoder method) (fairseq2.nn.Projection method) (fairseq2.nn.ResidualConnect method) (fairseq2.nn.RMSNorm method) (fairseq2.nn.RotaryEncoder method) (fairseq2.nn.ScaledResidualConnect method) (fairseq2.nn.ShardedEmbedding method) (fairseq2.nn.SinusoidalPositionEncoder method) (fairseq2.nn.StandardEmbedding method) (fairseq2.nn.StandardLayerNorm method) (fairseq2.nn.TiedProjection method) from_embedding() (fairseq2.nn.ShardedEmbedding static method) G Gang (class in fairseq2.gang) Gangs (class in fairseq2.gang) get_qwen_model_hub() (in module fairseq2.models.qwen) get_qwen_shard_specs() (in module fairseq2.models.qwen) get_qwen_tokenizer_hub() (in module fairseq2.models.qwen) H head_dim (fairseq2.models.qwen.QwenConfig attribute) I increment_step_nr() (fairseq2.nn.IncrementalStateBag method) IncrementalState (class in fairseq2.nn) IncrementalStateBag (class in fairseq2.nn) init_scaled_embedding() (in module fairseq2.nn) K k_norm (fairseq2.models.qwen.QwenConfig attribute) L LayerNorm (class in fairseq2.nn) LearnedPositionEncoder (class in fairseq2.nn) Linear (class in fairseq2.nn) load_model() (in module fairseq2.models.hub) M MAX (fairseq2.gang.ReduceOperation attribute) max_num_steps (fairseq2.nn.IncrementalStateBag property) max_seq_len (fairseq2.models.qwen.QwenConfig attribute) (fairseq2.nn.BatchLayout property) maybe_get_state() (fairseq2.nn.IncrementalStateBag method) MEAN (fairseq2.gang.ReduceOperation attribute) MIN (fairseq2.gang.ReduceOperation attribute) min_seq_len (fairseq2.nn.BatchLayout property) model_dim (fairseq2.models.qwen.QwenConfig attribute) ModelArchitectureNotKnownError ModelFamilyNotKnownError ModelHub (class in fairseq2.models.hub) ModelHubAccessor (class in fairseq2.models.hub) ModelNotKnownError module fairseq2.assets N num_attn_heads (fairseq2.models.qwen.QwenConfig attribute) num_key_value_heads (fairseq2.models.qwen.QwenConfig attribute) num_layers (fairseq2.models.qwen.QwenConfig attribute) O of() (fairseq2.nn.BatchLayout static method) P packed (fairseq2.nn.BatchLayout property) padded (fairseq2.nn.BatchLayout property) position_indices (fairseq2.nn.BatchLayout property) PositionEncoder (class in fairseq2.nn) pp (fairseq2.gang.Gangs attribute) ProcessGroupGang (class in fairseq2.gang) PRODUCT (fairseq2.gang.ReduceOperation attribute) Projection (class in fairseq2.nn) Q q_norm (fairseq2.models.qwen.QwenConfig attribute) qkv_proj_bias (fairseq2.models.qwen.QwenConfig attribute) QWEN_FAMILY (in module fairseq2.models.qwen) QwenConfig (class in fairseq2.models.qwen) QwenFactory (class in fairseq2.models.qwen) QwenTokenizer (class in fairseq2.models.qwen) QwenTokenizerConfig (class in fairseq2.models.qwen) R rank (fairseq2.gang.FakeGang property) (fairseq2.gang.Gang property) (fairseq2.gang.ProcessGroupGang property) rdp (fairseq2.gang.Gangs attribute) ReduceOperation (class in fairseq2.gang) reorder() (fairseq2.nn.IncrementalState method) (fairseq2.nn.IncrementalStateBag method) reset_non_persistent_buffers() (fairseq2.nn.RotaryEncoder method) (fairseq2.nn.SinusoidalPositionEncoder method) reset_parameters() (fairseq2.nn.LearnedPositionEncoder method) (fairseq2.nn.Linear method) (fairseq2.nn.RMSNorm method) (fairseq2.nn.RotaryEncoder method) (fairseq2.nn.ShardedEmbedding method) (fairseq2.nn.SinusoidalPositionEncoder method) (fairseq2.nn.StandardEmbedding method) (fairseq2.nn.StandardLayerNorm method) ResidualConnect (class in fairseq2.nn) RMSNorm (class in fairseq2.nn) root (fairseq2.gang.Gangs attribute) rope_theta (fairseq2.models.qwen.QwenConfig attribute) RotaryEncoder (class in fairseq2.nn) S ScaledResidualConnect (class in fairseq2.nn) sdp (fairseq2.gang.Gangs attribute) seq_begin_indices (fairseq2.nn.BatchLayout property) seq_begin_indices_pt (fairseq2.nn.BatchLayout property) seq_lens (fairseq2.nn.BatchLayout property) seq_lens_pt (fairseq2.nn.BatchLayout property) set_state() (fairseq2.nn.IncrementalStateBag method) ShardedEmbedding (class in fairseq2.nn) SinusoidalPositionEncoder (class in fairseq2.nn) size (fairseq2.gang.FakeGang property) (fairseq2.gang.Gang property) (fairseq2.gang.ProcessGroupGang property) size_bytes() (fairseq2.nn.IncrementalState method) (fairseq2.nn.IncrementalStateBag method) StandardEmbedding (class in fairseq2.nn) StandardLayerNorm (class in fairseq2.nn) step_nr (fairseq2.nn.IncrementalStateBag property) SUM (fairseq2.gang.ReduceOperation attribute) supports_process_group (fairseq2.gang.FakeGang property) (fairseq2.gang.Gang property) (fairseq2.gang.ProcessGroupGang property) T tied_embeddings (fairseq2.models.qwen.QwenConfig attribute) TiedProjection (class in fairseq2.nn) to_embedding() (fairseq2.nn.ShardedEmbedding method) tp (fairseq2.gang.Gangs attribute) V vocab_info (fairseq2.models.qwen.QwenTokenizer property) vocab_size (fairseq2.models.qwen.QwenConfig attribute) W width (fairseq2.nn.BatchLayout property)