apply_llama3_rope_scaling
                        Apply Llama3-style RoPE scaling
apply_rotary_emb_s3     Apply rotary position embeddings
apply_rotary_pos_emb    Apply rotary position embeddings to Q and K
attention_block         Attention block for perceiver
basic_res_block         Basic residual block for FCM
basic_transformer_block
                        Basic transformer block
cam_dense_tdnn_block    CAM Dense TDNN Block (multiple layers with
                        dense connections)
cam_dense_tdnn_layer    CAM Dense TDNN Layer
cam_layer               CAM (Context-Aware Masking) Layer
campplus                CAMPPlus speaker encoder
causal_block1d          Causal Block 1D - CausalConv + LayerNorm + Mish
causal_cfm              Causal Conditional Flow Matching
causal_conv1d           Causal Conv1d - pads left only
causal_masked_diff_xvec
                        Causal Masked Diff with Xvector
causal_resnet_block1d   Causal ResNet Block 1D
cfm_attention           Self-attention for transformer block
cfm_estimator           CFM Estimator (ConditionalDecoder)
chatterbox              Create (and load) a Chatterbox TTS model
chatterbox_gc_options   Recommended torch garbage-collection settings
                        for chatterbox
compute_rope_frequencies
                        Compute rotary position embeddings frequencies
compute_ve_mel          Compute mel spectrogram for voice encoder
conformer_encoder_layer
                        Conformer Encoder Layer
conv_rnn_f0_predictor   Convolutional RNN F0 Predictor
create_kv_cache         Create pre-allocated KV cache
create_mel_filterbank   Create mel filterbank
create_voice_embedding
                        Create voice embedding from reference audio
dense_layer             Dense layer for final embedding
download_chatterbox_models
                        Download Chatterbox Models from HuggingFace
download_chatterbox_turbo_models
                        Download Chatterbox Turbo Models from
                        HuggingFace
drop_invalid_tokens     Drop invalid speech tokens
espnet_rel_positional_encoding
                        Sinusoidal positional encoding (Espnet
                        RelPositionalEncoding)
fcm_module              Factorized Convolutional Module (FCM)
feed_forward            Feed-forward network for transformer Matches
                        diffusers FeedForward: net = [GELU(proj),
                        Dropout, Linear]
fsmn_multi_head_attention
                        FSMN Multi-Head Attention
fsq_codebook            FSQ Codebook module
fsq_vector_quantization
                        FSQ Vector Quantization wrapper
gelu_with_proj          GELU activation with projection (matches
                        diffusers GELU structure)
generate                Generate speech from text
generate_batch          Generate speech for several texts with one
                        batched synthesis pass
get_conv_padding        Get padding for convolution
get_traced_layers       Get or create traced layers for cached
                        inference
gpt2_attention          GPT-2 Attention (combined QKV projection)
gpt2_block              GPT-2 Transformer Block
gpt2_config             GPT-2 Model Configuration
gpt2_layer_norm         GPT-2 Layer Normalization
gpt2_mlp                GPT-2 MLP (GELU activation)
gpt2_model              GPT-2 Model (transformer backbone)
hifigan_resblock        HiFiGAN Residual Block
hift_generator          HiFTNet Generator
init_cache_from_first   Initialize cache with first token K/V values
integrated_loudness     Integrated loudness (ITU-R BS.1770-4)
is_loaded               Check if model is loaded
learned_position_embeddings
                        Learned position embeddings module
linear_no_subsampling   Linear No Subsampling layer
llama_attention         Llama attention module
llama_config_520m       Create Llama 520M configuration
llama_decoder_layer     Llama decoder layer
llama_mlp               Llama MLP module
llama_model             Llama model (decoder only)
llama_rms_norm          RMS Normalization module
load_chatterbox         Load Chatterbox model weights
load_chatterbox_turbo   Load Chatterbox Turbo model weights
load_conformer_encoder_weights
                        Load Conformer Encoder weights
load_llama_weights      Load weights from safetensors into Llama model
load_t3_turbo_weights   Load T3 turbo weights from safetensors
load_t3_weights         Load T3 weights from safetensors
load_tokenizer          Load tokenizer from JSON file (internal)
load_voice_embedding    Load a voice embedding from disk
load_voice_encoder_weights
                        Load voice encoder weights from safetensors
make_non_pad_mask_s3    Create non-padding mask
make_pad_mask           Create padding mask
mask_to_bias            Convert mask to attention bias
mish_activation         Mish activation
models_available        Check if Models are Downloaded
normalize_loudness      Normalize audio to a target loudness
normalize_tts_text      Normalize text for TTS
pad_audio_for_tokenizer
                        Pad audio to multiple of token rate
perceiver_resampler     Perceiver resampler for conditioning
                        compression
positionwise_feedforward
                        Positionwise Feed Forward
pre_lookahead_layer     Pre-Lookahead Layer
precompute_freqs_cis    Precompute rotary position embedding
                        frequencies
print.chatterbox        Print method for chatterbox
print.chatterbox_gc_options
                        Print method for chatterbox_gc_options
print.voice_embedding   Print method for voice_embedding
punc_norm               Normalize punctuation for TTS
quick_tts               Quick TTS - one-line text-to-speech
read_audio              Read audio file
reflection_pad1d        Reflection padding for 1D (nn_reflection_pad1d
                        equivalent)
rel_position_attention
                        Relative Position Multi-Headed Attention
resample_audio          Resample audio
rotate_half             Rotate half of the tensor for RoPE
s3_audio_encoder        S3 Audio Encoder V2
s3_log_mel_spectrogram
                        Compute log mel spectrogram for S3Tokenizer
s3_multi_head_attention
                        Multi-Head Attention base module
s3_residual_attention_block
                        Residual attention block
s3_tokenizer            S3Tokenizer V2 module
s3_tokenizer_config     S3Tokenizer model configuration
s3gen                   S3Gen Token to Waveform
save_voice_embedding    Save a voice embedding to disk
serve                   Serve chatterbox over HTTP
sine_gen                Sine Generator
sinusoidal_pos_emb      Sinusoidal positional embedding for timesteps
snake_activation        Snake activation function
source_module_hn_nsf    Source Module for Neural Source Filter
statistics_pooling      Statistics pooling
t3_cond                 Create T3 conditioning object
t3_cond_enc             T3 conditioning encoder
t3_cond_to_device       Move T3 conditioning to device
t3_config_english       Create T3 configuration (English-only)
t3_config_turbo         Create T3 turbo configuration (GPT-2 backbone)
t3_inference_traced     T3 inference with JIT tracing (optimized)
t3_model                T3 Token-to-Token TTS model
t3_model_turbo          T3 Token-to-Token TTS model (Turbo variant with
                        GPT-2 backbone)
tdnn_layer              TDNN Layer
timestep_embedding      Timestep embedding MLP
tokenize_text           Encode text to token IDs using BPE
traceable_attention     Traceable attention module with pre-allocated
                        KV cache
traceable_decoder_layer
                        Traceable decoder layer with pre-allocated KV
                        cache
traceable_kv_projector
                        Traceable K/V projection module
traceable_transformer_cached
                        Traceable transformer for cached inference
traceable_transformer_first
                        Traceable transformer for first token (no
                        cache)
transit_layer           Transit layer (channel reduction)
transpose_layer         Transpose layer for use in sequential
tts_chunked             Generate speech for long text (the long-form
                        policy layer)
tts_to_file             Generate speech and save to file
turbo_models_available
                        Check if Turbo Models are Downloaded
update_kv_cache         Update KV cache with new K/V values
update_valid_mask       Update valid mask to include new position
upsample_1d             Upsample 1D
upsample_conformer_encoder
                        Upsample Conformer Encoder
upsample_conformer_encoder_full
                        Upsample Conformer Encoder
voice_convert           Convert speech to a target voice
voice_encoder           Voice encoder module
voice_encoder_config    Voice encoder configuration
write_audio             Write audio file
