NotificationsYou must be signed in to change notification settings
Fork31.3k
Star153k

Commit5a4a08a

authored

Merge branch 'main' into kernel_mapping_error_resolve

2 parents04e27cb +d08b98b commit5a4a08aCopy full SHA for 5a4a08a

File tree

1,157 files changed

+30524

-65638

lines changed

.github/workflows
- get-pr-info.yml
- self-comment-ci.yml
CONTRIBUTING.md
MIGRATION_GUIDE_V5.md
README.md
benchmark_v2
- framework
- run_benchmarks.py
docs/source
- ar
- de
  - installation.md
  - training.md
- en
- es
  - installation.md
  - training.md
- fr
  - installation.md
- it
- ja
  - installation.md
  - internal
    - tokenization_utils.md
  - main_classes
    - tokenizer.md
  - model_doc
    - bloom.md
  - training.md
- ko
  - deepspeed.md
  - installation.md
  - internal
    - tokenization_utils.md
  - llm_tutorial_optimization.md
  - main_classes
    - tokenizer.md
  - model_doc
    - cohere.md
  - training.md
- pt
  - installation.md
  - training.md
- zh
  - installation.md
  - internal
    - tokenization_utils.md
  - main_classes
    - tokenizer.md
  - training.md
examples
- modular-transformers
  - configuration_duplicated_method.py
- pytorch
  - README.md
  - continuous_batching.py
  - continuous_batching_simple.py
  - question-answering
    - run_qa.py
  - text-generation
    - run_generation.py
  - token-classification
    - run_ner.py
  - translation
    - run_translation.py
setup.py
src/transformers
- __init__.py
- audio_utils.py
- cli
  - add_fast_image_processor.py
  - add_new_model_like.py
  - chat.py
  - download.py
  - run.py
  - serve.py
  - system.py
  - transformers.py
- configuration_utils.py
- conversion_mapping.py
- convert_slow_tokenizer.py
- convert_slow_tokenizers_checkpoints_to_fast.py
- core_model_loading.py
- data
  - data_collator.py
  - datasets
    - glue.py
    - squad.py
  - processors
- dependency_versions_table.py
- feature_extraction_sequence_utils.py
- feature_extraction_utils.py
- file_utils.py
- generation
  - candidate_generator.py
  - configuration_utils.py
  - continuous_batching
  - logits_process.py
  - stopping_criteria.py
  - utils.py
  - watermarking.py
- image_processing_base.py
- image_processing_utils_fast.py
- integrations
  - accelerate.py
  - bitsandbytes.py
  - eager_paged.py
  - executorch.py
  - finegrained_fp8.py
  - flash_attention.py
  - flash_paged.py
  - flex_attention.py
  - higgs.py
  - hub_kernels.py
  - integration_utils.py
  - mistral.py
  - mxfp4.py
  - peft.py
  - sdpa_attention.py
  - sdpa_paged.py
  - tensor_parallel.py
  - tiktoken.py
  - torchao.py
- loss
  - loss_for_object_detection.py
  - loss_utils.py
- masking_utils.py
- modeling_flash_attention_utils.py
- modeling_utils.py
- models
  - __init__.py
  - aimv2
    - modeling_aimv2.py
    - modular_aimv2.py
  - albert
    - __init__.py
    - tokenization_albert.py
    - tokenization_albert_fast.py
  - align
    - modeling_align.py
  - altclip
    - modeling_altclip.py
  - apertus
    - configuration_apertus.py
    - modular_apertus.py
  - arcee
    - configuration_arcee.py
    - modular_arcee.py
  - aria
    - configuration_aria.py
    - modeling_aria.py
    - modular_aria.py
    - processing_aria.py
  - audioflamingo3
    - modeling_audioflamingo3.py
    - processing_audioflamingo3.py
  - autoformer
    - modeling_autoformer.py
  - auto
    - configuration_auto.py
    - image_processing_auto.py
    - modeling_auto.py
    - processing_auto.py
    - tokenization_auto.py
  - aya_vision
    - modeling_aya_vision.py
  - bamba
    - configuration_bamba.py
  - bark
    - modeling_bark.py
  - barthez
    - __init__.py
    - tokenization_barthez.py
    - tokenization_barthez_fast.py
  - bartpho
    - tokenization_bartpho.py
  - bart
    - __init__.py
    - configuration_bart.py
    - modeling_bart.py
    - tokenization_bart.py
    - tokenization_bart_fast.py
  - beit
    - modeling_beit.py
  - bert_generation
    - tokenization_bert_generation.py
  - bert_japanese
    - tokenization_bert_japanese.py
  - bertweet
    - tokenization_bertweet.py
  - bert
    - __init__.py
    - tokenization_bert.py
    - tokenization_bert_fast.py
    - tokenization_bert_legacy.py
  - big_bird
    - __init__.py
    - tokenization_big_bird.py
    - tokenization_big_bird_fast.py
  - bigbird_pegasus
    - modeling_bigbird_pegasus.py
  - biogpt
    - tokenization_biogpt.py
  - bitnet
    - configuration_bitnet.py
  - bit
    - modeling_bit.py
  - blenderbot_small
    - tokenization_blenderbot_small.py
    - tokenization_blenderbot_small_fast.py
  - blenderbot
    - __init__.py
    - tokenization_blenderbot.py
    - tokenization_blenderbot_fast.py
  - blip_2
    - modeling_blip_2.py
  - blip
    - modeling_blip.py
  - bloom
    - __init__.py
    - tokenization_bloom_fast.py
  - blt
    - configuration_blt.py
    - modeling_blt.py
  - bridgetower
    - modeling_bridgetower.py
  - byt5
    - tokenization_byt5.py
  - camembert
    - __init__.py
    - tokenization_camembert.py
    - tokenization_camembert_fast.py
  - canine
    - tokenization_canine.py
  - chameleon
    - configuration_chameleon.py
    - modeling_chameleon.py
  - chinese_clip
    - modeling_chinese_clip.py
  - clap
    - modeling_clap.py
  - clipseg
    - modeling_clipseg.py
  - clip
    - __init__.py
    - modeling_clip.py
    - tokenization_clip.py
    - tokenization_clip_fast.py
  - clvp
    - tokenization_clvp.py
  - code_llama
    - __init__.py
    - tokenization_code_llama.py
    - tokenization_code_llama_fast.py
  - codegen
    - __init__.py
    - tokenization_codegen.py
    - tokenization_codegen_fast.py
  - cohere2_vision
    - modeling_cohere2_vision.py
  - cohere2
    - configuration_cohere2.py
    - modular_cohere2.py
  - cohere
    - __init__.py
    - configuration_cohere.py
    - tokenization_cohere.py
  - colpali
    - modeling_colpali.py
  - colqwen2
    - configuration_colqwen2.py
    - modeling_colqwen2.py
    - modular_colqwen2.py
    - processing_colqwen2.py
  - conditional_detr
    - convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py
    - modeling_conditional_detr.py
  - convbert
    - __init__.py
    - tokenization_convbert.py
    - tokenization_convbert_fast.py
  - convnextv2
    - modeling_convnextv2.py
  - convnext
    - convert_convnext_to_pytorch.py
    - modeling_convnext.py
  - cpmant
    - tokenization_cpmant.py
  - cpm
    - __init__.py
    - tokenization_cpm.py
    - tokenization_cpm_fast.py
  - csm
    - configuration_csm.py
    - modeling_csm.py
    - modular_csm.py
  - ctrl
    - tokenization_ctrl.py
  - cwm
    - configuration_cwm.py
    - modular_cwm.py
  - d_fine
    - modeling_d_fine.py
  - dab_detr
    - modeling_dab_detr.py
  - data2vec
    - modeling_data2vec_vision.py
  - deberta_v2
    - __init__.py
    - tokenization_deberta_v2.py
    - tokenization_deberta_v2_fast.py
  - deberta
    - __init__.py
    - tokenization_deberta.py
    - tokenization_deberta_fast.py
  - deepseek_v2
    - configuration_deepseek_v2.py
    - modular_deepseek_v2.py
  - deepseek_v3
    - configuration_deepseek_v3.py
  - deepseek_vl_hybrid
    - modeling_deepseek_vl_hybrid.py
  - deepseek_vl
    - modeling_deepseek_vl.py
    - modular_deepseek_vl.py
  - deformable_detr
    - convert_deformable_detr_to_pytorch.py
    - modeling_deformable_detr.py
  - deit
    - modeling_deit.py
  - depth_anything
    - modeling_depth_anything.py
  - depth_pro
    - modeling_depth_pro.py
  - detr
    - modeling_detr.py
  - dia
    - configuration_dia.py
    - modeling_dia.py
    - modular_dia.py
    - tokenization_dia.py
  - diffllama
    - configuration_diffllama.py
  - dinat
    - modeling_dinat.py
  - dinov2_with_registers
    - modeling_dinov2_with_registers.py
  - dinov2
    - modeling_dinov2.py
  - dinov3_convnext
    - modeling_dinov3_convnext.py
  - dinov3_vit
    - modeling_dinov3_vit.py
  - distilbert
    - __init__.py
    - tokenization_distilbert.py
    - tokenization_distilbert_fast.py
  - dit
    - convert_dit_unilm_to_pytorch.py
  - doge
    - configuration_doge.py
    - modular_doge.py
  - donut
    - modeling_donut_swin.py
  - dots1
    - configuration_dots1.py
  - dpr
    - tokenization_dpr_fast.py
  - dpt
    - convert_dpt_to_pytorch.py
    - modeling_dpt.py
  - edgetam_video
    - modeling_edgetam_video.py
  - edgetam
    - modeling_edgetam.py
  - efficientloftr
    - configuration_efficientloftr.py
    - image_processing_efficientloftr.py
    - image_processing_efficientloftr_fast.py
    - modeling_efficientloftr.py
    - modular_efficientloftr.py
  - efficientnet
    - modeling_efficientnet.py
  - electra
    - __init__.py
    - tokenization_electra_fast.py
  - emu3
    - configuration_emu3.py
    - modeling_emu3.py
    - modular_emu3.py
    - processing_emu3.py
  - eomt
    - modeling_eomt.py
    - modular_eomt.py
  - ernie4_5_moe
    - configuration_ernie4_5_moe.py
  - ernie4_5
    - configuration_ernie4_5.py
  - esm
    - modeling_esm.py
    - tokenization_esm.py
  - evolla
    - processing_evolla.py
  - exaone4
    - configuration_exaone4.py
    - modular_exaone4.py
  - falcon
    - configuration_falcon.py
  - fastspeech2_conformer
    - tokenization_fastspeech2_conformer.py
  - flaubert
    - tokenization_flaubert.py
  - flava
    - modeling_flava.py
  - flex_olmo
    - configuration_flex_olmo.py
    - modular_flex_olmo.py
  - florence2
    - modeling_florence2.py
    - modular_florence2.py
  - fnet
    - __init__.py
    - tokenization_fnet.py
    - tokenization_fnet_fast.py
  - fsmt
    - tokenization_fsmt.py
  - funnel
    - __init__.py
    - tokenization_funnel.py
    - tokenization_funnel_fast.py
  - fuyu
    - configuration_fuyu.py
    - modeling_fuyu.py
  - gemma2
    - configuration_gemma2.py
    - modeling_gemma2.py
    - modular_gemma2.py
  - gemma3n
    - configuration_gemma3n.py
    - modeling_gemma3n.py
    - modular_gemma3n.py
    - processing_gemma3n.py
  - gemma3
    - configuration_gemma3.py
    - modeling_gemma3.py
    - modular_gemma3.py
  - gemma
    - configuration_gemma.py
    - modular_gemma.py
    - tokenization_gemma.py
    - tokenization_gemma_fast.py
  - git
    - modeling_git.py
  - glm46v
    - modeling_glm46v.py
  - glm4_moe
    - configuration_glm4_moe.py
    - modular_glm4_moe.py
  - glm4v_moe
    - configuration_glm4v_moe.py
    - modeling_glm4v_moe.py
    - modular_glm4v_moe.py
  - glm4v
    - configuration_glm4v.py
    - modeling_glm4v.py
    - modular_glm4v.py
  - glm4
    - configuration_glm4.py
  - glm
    - configuration_glm.py
  - glpn
    - convert_glpn_to_pytorch.py
    - modeling_glpn.py
  - got_ocr2
    - convert_got_ocr2_weights_to_hf.py
    - modeling_got_ocr2.py
    - modular_got_ocr2.py
  - gpt2
    - __init__.py
    - tokenization_gpt2.py
    - tokenization_gpt2_fast.py
  - gpt_neox_japanese
    - configuration_gpt_neox_japanese.py
    - tokenization_gpt_neox_japanese.py
  - gpt_neox
    - __init__.py
    - configuration_gpt_neox.py
    - tokenization_gpt_neox.py
    - tokenization_gpt_neox_fast.py
  - gpt_oss
    - convert_gpt_oss_weights_to_hf.py
  - gpt_sw3
    - tokenization_gpt_sw3.py
  - granite_speech
    - modeling_granite_speech.py
    - processing_granite_speech.py
  - granitemoehybrid
    - configuration_granitemoehybrid.py
    - modeling_granitemoehybrid.py
    - modular_granitemoehybrid.py
  - granitemoeshared
    - configuration_granitemoeshared.py
  - granitemoe
    - configuration_granitemoe.py
  - granite
    - configuration_granite.py
  - grounding_dino
    - modeling_grounding_dino.py
  - groupvit
    - convert_groupvit_nvlab_to_hf.py
    - modeling_groupvit.py
  - helium
    - configuration_helium.py
  - herbert
    - __init__.py
    - tokenization_herbert.py
    - tokenization_herbert_fast.py
  - hgnet_v2
    - modeling_hgnet_v2.py
    - modular_hgnet_v2.py
  - hiera
    - modeling_hiera.py
  - hubert
    - modeling_hubert.py
  - hunyuan_v1_dense
    - configuration_hunyuan_v1_dense.py
  - hunyuan_v1_moe
    - configuration_hunyuan_v1_moe.py
  - idefics2
    - modeling_idefics2.py
    - processing_idefics2.py
  - idefics3
    - modeling_idefics3.py
  - idefics
    - modeling_idefics.py
  - ijepa
    - modeling_ijepa.py
  - imagegpt
    - modeling_imagegpt.py
  - informer
    - modeling_informer.py
    - modular_informer.py
  - instructblipvideo
    - modeling_instructblipvideo.py
    - modular_instructblipvideo.py
  - instructblip
    - modeling_instructblip.py
  - internvl
    - convert_internvl_weights_to_hf.py
    - modeling_internvl.py
    - modular_internvl.py
  - janus
    - modeling_janus.py
    - modular_janus.py
    - processing_janus.py
  - jetmoe
    - configuration_jetmoe.py
  - kosmos2_5
    - modeling_kosmos2_5.py
    - processing_kosmos2_5.py
  - kosmos2
    - modeling_kosmos2.py
    - processing_kosmos2.py
  - kyutai_speech_to_text
    - configuration_kyutai_speech_to_text.py
    - modeling_kyutai_speech_to_text.py
    - modular_kyutai_speech_to_text.py
  - layoutlmv2
    - __init__.py
    - configuration_layoutlmv2.py
    - modeling_layoutlmv2.py
    - tokenization_layoutlmv2.py
    - tokenization_layoutlmv2_fast.py
  - layoutlmv3
    - __init__.py
    - image_processing_layoutlmv3.py
    - modeling_layoutlmv3.py
    - tokenization_layoutlmv3.py
    - tokenization_layoutlmv3_fast.py
  - layoutlm
    - __init__.py
    - tokenization_layoutlm.py
    - tokenization_layoutlm_fast.py
  - layoutxlm
    - __init__.py
    - configuration_layoutxlm.py
    - modular_layoutxlm.py
    - tokenization_layoutxlm.py
    - tokenization_layoutxlm_fast.py
  - led
    - __init__.py
    - tokenization_led.py
    - tokenization_led_fast.py
  - levit
    - modeling_levit.py
  - lfm2_moe
    - configuration_lfm2_moe.py
  - lfm2_vl
    - modeling_lfm2_vl.py
    - processing_lfm2_vl.py
  - lfm2
    - configuration_lfm2.py
  - lightglue
    - image_processing_lightglue.py
    - image_processing_lightglue_fast.py
    - modeling_lightglue.py
    - modular_lightglue.py
  - llama4
    - configuration_llama4.py
    - modeling_llama4.py
  - llama
    - __init__.py
    - configuration_llama.py
    - convert_llama_weights_to_hf.py
    - tokenization_llama.py
    - tokenization_llama_fast.py
  - llava_next_video
    - modeling_llava_next_video.py
    - modular_llava_next_video.py
  - llava_next
    - image_processing_llava_next_fast.py
    - modeling_llava_next.py
  - llava_onevision
    - image_processing_llava_onevision_fast.py
    - modeling_llava_onevision.py
    - modular_llava_onevision.py
  - llava
    - modeling_llava.py
  - longformer
    - __init__.py
    - tokenization_longformer.py
    - tokenization_longformer_fast.py
  - luke
    - tokenization_luke.py
  - lxmert
    - __init__.py
    - modeling_lxmert.py
    - tokenization_lxmert.py
    - tokenization_lxmert_fast.py
  - m2m_100
    - tokenization_m2m_100.py
  - marian
    - tokenization_marian.py
  - markuplm
    - __init__.py
    - tokenization_markuplm.py
    - tokenization_markuplm_fast.py
  - mask2former
    - modeling_mask2former.py
  - maskformer
    - convert_maskformer_original_pytorch_checkpoint_to_pytorch.py
    - modeling_maskformer.py
    - modeling_maskformer_swin.py
  - mbart50
    - __init__.py
    - tokenization_mbart50.py
    - tokenization_mbart50_fast.py
  - mbart
    - __init__.py
    - tokenization_mbart.py
    - tokenization_mbart_fast.py
  - metaclip_2
    - modeling_metaclip_2.py
  - mgp_str
    - processing_mgp_str.py
    - tokenization_mgp_str.py
  - mimi
    - configuration_mimi.py
  - minimax
    - configuration_minimax.py
    - modular_minimax.py
  - ministral
    - configuration_ministral.py
    - modular_ministral.py
  - mistral3
    - modeling_mistral3.py
  - mistral
    - configuration_mistral.py
  - mixtral
    - configuration_mixtral.py
  - mlcd
    - modeling_mlcd.py
  - mllama
    - configuration_mllama.py
    - modeling_mllama.py
  - mluke
    - tokenization_mluke.py
  - mm_grounding_dino
    - modeling_mm_grounding_dino.py
  - mobilebert
    - __init__.py
    - tokenization_mobilebert.py
    - tokenization_mobilebert_fast.py
  - mobilenet_v1
    - modeling_mobilenet_v1.py
  - mobilenet_v2
    - modeling_mobilenet_v2.py
  - mobilevitv2
    - modeling_mobilevitv2.py
  - mobilevit
    - modeling_mobilevit.py
  - modernbert_decoder
    - configuration_modernbert_decoder.py
    - modular_modernbert_decoder.py
  - modernbert
    - configuration_modernbert.py
    - modular_modernbert.py
  - moonshine
    - configuration_moonshine.py
    - modular_moonshine.py
  - moshi
    - configuration_moshi.py
    - modeling_moshi.py
  - mpnet
    - __init__.py
    - tokenization_mpnet.py
    - tokenization_mpnet_fast.py
  - mt5
    - tokenization_mt5.py
    - tokenization_mt5_fast.py
  - musicgen_melody
    - modeling_musicgen_melody.py
  - musicgen
    - modeling_musicgen.py
  - mvp
    - __init__.py
    - configuration_mvp.py
    - tokenization_mvp.py
    - tokenization_mvp_fast.py
  - myt5
    - tokenization_myt5.py
  - nanochat
    - __init__.py
    - configuration_nanochat.py
    - convert_nanochat_checkpoints.py
    - modeling_nanochat.py
    - modular_nanochat.py
  - nemotron
    - configuration_nemotron.py
  - nllb
    - __init__.py
    - tokenization_nllb.py
    - tokenization_nllb_fast.py
  - nougat
    - __init__.py
    - tokenization_nougat.py
  - olmo2
    - configuration_olmo2.py
    - modular_olmo2.py
  - olmo3
    - configuration_olmo3.py
    - modular_olmo3.py
  - olmoe
    - configuration_olmoe.py
  - olmo
    - configuration_olmo.py
  - omdet_turbo
    - modeling_omdet_turbo.py
  - oneformer
    - convert_to_hf_oneformer.py
    - modeling_oneformer.py
  - openai
    - __init__.py
    - tokenization_openai.py
    - tokenization_openai_fast.py
  - ovis2
    - convert_ovis2_weights_to_hf.py
    - modeling_ovis2.py
    - modular_ovis2.py
  - owlv2
    - modeling_owlv2.py
  - owlvit
    - modeling_owlvit.py
    - processing_owlvit.py
  - paligemma
    - modeling_paligemma.py
  - parakeet
    - modeling_parakeet.py
    - modular_parakeet.py
    - tokenization_parakeet_fast.py
  - patchtsmixer
    - modeling_patchtsmixer.py
  - patchtst
    - modeling_patchtst.py
  - pegasus
    - __init__.py
    - tokenization_pegasus.py
    - tokenization_pegasus_fast.py
  - perceiver
    - modeling_perceiver.py
    - tokenization_perceiver.py
  - perception_lm
    - convert_perception_lm_weights_to_hf.py
    - modeling_perception_lm.py
  - persimmon
    - configuration_persimmon.py
  - phi3
    - configuration_phi3.py
  - phi4_multimodal
    - configuration_phi4_multimodal.py
    - modeling_phi4_multimodal.py
    - modular_phi4_multimodal.py
  - phimoe
    - configuration_phimoe.py
  - phi
    - configuration_phi.py
  - phobert
    - tokenization_phobert.py
  - pix2struct
    - __init__.py
    - image_processing_pix2struct_fast.py
    - modeling_pix2struct.py
    - processing_pix2struct.py
  - pixtral
    - modeling_pixtral.py
  - plbart
    - tokenization_plbart.py
  - poolformer
    - modeling_poolformer.py
  - pop2piano
    - modeling_pop2piano.py
    - processing_pop2piano.py
    - tokenization_pop2piano.py
  - prompt_depth_anything
    - modeling_prompt_depth_anything.py
    - modular_prompt_depth_anything.py
  - prophetnet
    - tokenization_prophetnet.py
  - pvt_v2
    - modeling_pvt_v2.py
  - pvt
    - modeling_pvt.py
  - qwen2_5_omni
    - configuration_qwen2_5_omni.py
    - modeling_qwen2_5_omni.py
    - modular_qwen2_5_omni.py
    - processing_qwen2_5_omni.py
  - qwen2_5_vl
    - configuration_qwen2_5_vl.py
    - modeling_qwen2_5_vl.py
  - qwen2_audio
    - modeling_qwen2_audio.py
  - qwen2_moe
    - configuration_qwen2_moe.py
  - qwen2_vl
    - configuration_qwen2_vl.py
    - modeling_qwen2_vl.py
  - qwen2
    - configuration_qwen2.py
    - tokenization_qwen2.py
    - tokenization_qwen2_fast.py
  - qwen3_moe
    - configuration_qwen3_moe.py
  - qwen3_next
    - configuration_qwen3_next.py
  - qwen3_omni_moe
    - configuration_qwen3_omni_moe.py
    - modeling_qwen3_omni_moe.py
    - modular_qwen3_omni_moe.py
    - processing_qwen3_omni_moe.py
  - qwen3_vl_moe
    - configuration_qwen3_vl_moe.py
    - modeling_qwen3_vl_moe.py
    - modular_qwen3_vl_moe.py
  - qwen3_vl
    - configuration_qwen3_vl.py
    - modeling_qwen3_vl.py
    - modular_qwen3_vl.py
  - qwen3
    - configuration_qwen3.py
  - rag
    - retrieval_rag.py
  - recurrent_gemma
    - configuration_recurrent_gemma.py
  - reformer
    - __init__.py
    - tokenization_reformer.py
    - tokenization_reformer_fast.py
  - regnet
    - convert_regnet_seer_10b_to_pytorch.py
    - convert_regnet_to_pytorch.py
  - rembert
    - __init__.py
    - tokenization_rembert.py
    - tokenization_rembert_fast.py
  - resnet
    - convert_resnet_to_pytorch.py
    - modeling_resnet.py
  - roberta
    - __init__.py
    - tokenization_roberta.py
    - tokenization_roberta_old.py
  - roc_bert
    - tokenization_roc_bert.py
  - roformer
    - tokenization_roformer.py
    - tokenization_roformer_fast.py
  - rt_detr_v2
    - modeling_rt_detr_v2.py
  - rt_detr
    - modeling_rt_detr.py
    - modeling_rt_detr_resnet.py
  - sam2_video
    - modeling_sam2_video.py
    - modular_sam2_video.py
    - processing_sam2_video.py
    - video_processing_sam2_video.py
  - sam2
    - image_processing_sam2_fast.py
    - modeling_sam2.py
    - modular_sam2.py
    - processing_sam2.py
  - sam3_tracker_video
    - modeling_sam3_tracker_video.py
    - processing_sam3_tracker_video.py
  - sam3_tracker
    - modeling_sam3_tracker.py
    - processing_sam3_tracker.py
  - sam3_video
    - convert_sam3_video_to_hf.py
    - modeling_sam3_video.py
    - processing_sam3_video.py
  - sam3
    - convert_sam3_to_hf.py
    - image_processing_sam3_fast.py
    - modeling_sam3.py
    - processing_sam3.py
  - sam_hq
    - modeling_sam_hq.py
    - processing_samhq.py
  - sam
    - image_processing_sam_fast.py
    - modeling_sam.py
  - seamless_m4t_v2
    - modeling_seamless_m4t_v2.py
  - seamless_m4t
    - __init__.py
    - modeling_seamless_m4t.py
    - tokenization_seamless_m4t.py
    - tokenization_seamless_m4t_fast.py
  - seed_oss
    - configuration_seed_oss.py
  - segformer
    - modeling_segformer.py
  - seggpt
    - modeling_seggpt.py
  - sew_d
    - modeling_sew_d.py
  - sew
    - modeling_sew.py
  - shieldgemma2
    - modeling_shieldgemma2.py
  - siglip2
    - image_processing_siglip2_fast.py
    - modeling_siglip2.py
  - siglip
    - modeling_siglip.py
    - tokenization_siglip.py
  - smollm3
    - configuration_smollm3.py
    - modular_smollm3.py
  - smolvlm
    - modeling_smolvlm.py
  - speech_to_text
    - modeling_speech_to_text.py
    - tokenization_speech_to_text.py
  - speecht5
    - convert_speecht5_original_pytorch_checkpoint_to_pytorch.py
    - modeling_speecht5.py
    - tokenization_speecht5.py
  - splinter
    - tokenization_splinter.py
    - tokenization_splinter_fast.py
  - squeezebert
    - __init__.py
    - tokenization_squeezebert.py
    - tokenization_squeezebert_fast.py
  - stablelm
    - configuration_stablelm.py
  - starcoder2
    - configuration_starcoder2.py
  - superglue
    - image_processing_superglue.py
    - image_processing_superglue_fast.py
    - modeling_superglue.py
  - superpoint
    - modeling_superpoint.py
  - swiftformer
    - modeling_swiftformer.py
  - swin2sr
    - modeling_swin2sr.py
  - swinv2
    - convert_swinv2_timm_to_pytorch.py
    - modeling_swinv2.py
  - swin
    - modeling_swin.py
  - t5gemma
    - configuration_t5gemma.py
    - modular_t5gemma.py
  - t5
    - __init__.py
    - tokenization_t5.py
    - tokenization_t5_fast.py
  - table_transformer
    - modeling_table_transformer.py
  - tapas
    - tokenization_tapas.py
  - time_series_transformer
    - modeling_time_series_transformer.py
  - timesfm
    - modeling_timesfm.py
    - modular_timesfm.py
  - timesformer
    - modeling_timesformer.py
  - timm_backbone
    - modeling_timm_backbone.py
  - timm_wrapper
    - modeling_timm_wrapper.py
  - tvp
    - modeling_tvp.py
  - udop
    - __init__.py
    - modeling_udop.py
    - processing_udop.py
    - tokenization_udop.py
    - tokenization_udop_fast.py
  - unispeech_sat
    - modeling_unispeech_sat.py
  - unispeech
    - modeling_unispeech.py
  - upernet
    - modeling_upernet.py
  - vaultgemma
    - configuration_vaultgemma.py
    - modeling_vaultgemma.py
    - modular_vaultgemma.py
  - video_llama_3
    - modeling_video_llama_3.py
    - modular_video_llama_3.py
  - video_llava
    - modeling_video_llava.py
  - videomae
    - convert_videomae_to_pytorch.py
  - vilt
    - modeling_vilt.py
  - vipllava
    - modeling_vipllava.py
  - vision_encoder_decoder
    - modeling_vision_encoder_decoder.py
  - vision_text_dual_encoder
    - modeling_vision_text_dual_encoder.py
  - visual_bert
    - modeling_visual_bert.py
  - vit_mae
    - modeling_vit_mae.py
  - vit_msn
    - modeling_vit_msn.py
  - vitdet
    - modeling_vitdet.py
  - vitmatte
    - modeling_vitmatte.py
  - vitpose_backbone
    - modeling_vitpose_backbone.py
  - vitpose
    - __init__.py
    - image_processing_vitpose.py
    - image_processing_vitpose_fast.py
    - modeling_vitpose.py
  - vits
    - tokenization_vits.py
  - vit
    - modeling_vit.py
  - voxtral
    - convert_voxtral_weights_to_hf.py
    - modeling_voxtral.py
    - processing_voxtral.py
  - wav2vec2_bert
    - convert_wav2vec2_seamless_checkpoint.py
  - wav2vec2_phoneme
    - tokenization_wav2vec2_phoneme.py
  - wav2vec2_with_lm
    - processing_wav2vec2_with_lm.py
  - wav2vec2
    - modeling_wav2vec2.py
    - tokenization_wav2vec2.py
  - wavlm
    - modeling_wavlm.py
  - whisper
    - __init__.py
    - generation_whisper.py
    - modeling_whisper.py
    - tokenization_whisper.py
    - tokenization_whisper_fast.py
  - x_clip
    - convert_x_clip_original_pytorch_to_hf.py
    - modeling_x_clip.py
  - xcodec
    - modeling_xcodec.py
  - xglm
    - __init__.py
    - tokenization_xglm.py
    - tokenization_xglm_fast.py
  - xlm_roberta
    - tokenization_xlm_roberta.py
    - tokenization_xlm_roberta_fast.py
  - xlm
    - tokenization_xlm.py
  - xlnet
    - __init__.py
    - tokenization_xlnet.py
    - tokenization_xlnet_fast.py
  - yolos
    - convert_yolos_to_pytorch.py
    - modeling_yolos.py
  - zamba2
    - configuration_zamba2.py
  - zoedepth
    - modeling_zoedepth.py
- optimization.py
- pipelines
  - __init__.py
  - any_to_any.py
  - automatic_speech_recognition.py
  - base.py
  - deprecated
    - text2text_generation.py
  - image_text_to_text.py
  - question_answering.py
  - text_generation.py
  - text_to_audio.py
  - token_classification.py
  - zero_shot_classification.py
- processing_utils.py
- quantizers
  - auto.py
  - base.py
  - quantizer_awq.py
  - quantizer_bitnet.py
  - quantizer_bnb_4bit.py
  - quantizer_bnb_8bit.py
  - quantizer_eetq.py
  - quantizer_fbgemm_fp8.py
  - quantizer_finegrained_fp8.py
  - quantizer_higgs.py
  - quantizer_mxfp4.py
  - quantizer_quanto.py
  - quantizer_spqr.py
  - quantizer_torchao.py
  - quantizer_vptq.py
- testing_utils.py
- time_series_utils.py
- tokenization_mistral_common.py
- tokenization_python.py
- tokenization_utils_base.py
- tokenization_utils_sentencepiece.py
- tokenization_utils_tokenizers.py
- trainer.py
- trainer_callback.py
- trainer_pt_utils.py
- trainer_seq2seq.py
- trainer_utils.py
- training_args.py
- training_args_seq2seq.py
- utils
  - __init__.py
  - chat_template_utils.py
  - dummy_mistral_common_objects.py
  - dummy_sentencepiece_objects.py
  - generic.py
  - hub.py
  - import_utils.py
  - kernel_config.py
  - peft_utils.py
- video_processing_utils.py
- video_utils.py
tests
- causal_lm_tester.py
- cli
  - test_serve.py
- deepspeed
  - test_alst_ulysses_sp.py
- generation
  - test_stopping_criteria.py
  - test_utils.py
- models
  - albert
    - test_tokenization_albert.py
  - align
    - test_processing_align.py
  - altclip
    - test_processing_altclip.py
  - aria
    - test_processing_aria.py
  - auto
    - test_processor_auto.py
    - test_tokenization_auto.py
  - aya_vision
    - test_modeling_aya_vision.py
    - test_processing_aya_vision.py
  - bamba
    - test_modeling_bamba.py
  - barthez
    - test_tokenization_barthez.py
  - bartpho
    - test_tokenization_bartpho.py
  - bart
    - test_modeling_bart.py
    - test_tokenization_bart.py
  - bert_generation
    - test_tokenization_bert_generation.py
  - bert_japanese
    - test_tokenization_bert_japanese.py
  - bert
    - test_tokenization_bert.py
  - big_bird
    - test_tokenization_big_bird.py
  - biogpt
    - test_tokenization_biogpt.py
  - blenderbot_small
    - test_tokenization_blenderbot_small.py
  - blenderbot
    - test_tokenization_blenderbot.py
  - blip_2
    - test_modeling_blip_2.py
    - test_processing_blip_2.py
  - blip
    - test_processing_blip.py
  - bloom
    - test_modeling_bloom.py
    - test_tokenization_bloom.py
  - blt
    - test_modeling_blt.py
  - bridgetower
    - test_processing_bridgetower.py
  - byt5
    - test_tokenization_byt5.py
  - camembert
    - test_tokenization_camembert.py
  - canine
    - test_tokenization_canine.py
  - chameleon
    - test_modeling_chameleon.py
    - test_processing_chameleon.py
  - chinese_clip
    - test_processing_chinese_clip.py
  - clap
    - test_processing_clap.py
  - clipseg
    - test_processing_clipseg.py
  - clip
    - test_processing_clip.py
    - test_tokenization_clip.py
  - clvp
    - test_tokenization_clvp.py
  - code_llama
    - test_tokenization_code_llama.py
  - codegen
    - test_tokenization_codegen.py
  - cohere2_vision
    - test_modeling_cohere2_vision.py
    - test_processing_cohere2_vision.py
  - cohere
    - test_tokenization_cohere.py
  - colpali
    - test_processing_colpali.py
  - colqwen2
    - test_processing_colqwen2.py
  - cpmant
    - test_tokenization_cpmant.py
  - csm
    - test_processing_csm.py
  - dbrx
    - test_modeling_dbrx.py
  - deberta_v2
    - test_tokenization_deberta_v2.py
  - deberta
    - test_tokenization_deberta.py
  - deepseek_vl_hybrid
    - test_modeling_deepseek_vl_hybrid.py
    - test_processing_deepseek_vl_hybrid.py
  - deepseek_vl
    - test_modeling_deepseek_vl.py
    - test_processing_deepseek_vl.py
  - dia
    - test_tokenization_dia.py
  - diffllama
    - test_modeling_diffllama.py
  - distilbert
    - test_tokenization_distilbert.py
  - donut
    - test_processing_donut.py
  - dpr
    - test_tokenization_dpr.py
  - efficientloftr
    - test_image_processing_efficientloftr.py
  - electra
    - test_tokenization_electra.py
  - emu3
    - test_modeling_emu3.py
    - test_processing_emu3.py
  - ernie4_5_moe
    - test_modeling_ernie4_5_moe.py
  - esm
    - test_modeling_esm.py
    - test_tokenization_esm.py
  - evolla
    - test_processing_evolla.py
  - flaubert
    - test_tokenization_flaubert.py
  - flava
    - test_processing_flava.py
  - florence2
    - test_modeling_florence2.py
    - test_processing_florence2.py
  - fnet
    - test_modeling_fnet.py
    - test_tokenization_fnet.py
  - funnel
    - test_tokenization_funnel.py
  - fuyu
    - test_processing_fuyu.py
  - gemma3n
    - test_processing_gemma3n.py
  - gemma3
    - test_modeling_gemma3.py
    - test_processing_gemma3.py
  - gemma
    - test_tokenization_gemma.py
  - git
    - test_modeling_git.py
    - test_processing_git.py
  - glm46v
    - test_processor_glm46v.py
  - glm4_moe
    - test_modeling_glm4_moe.py
  - glm4v
    - test_processor_glm4v.py
  - glm4
    - test_modeling_glm4.py
  - got_ocr2
    - test_modeling_got_ocr2.py
    - test_processing_got_ocr2.py
  - gpt2
    - test_modeling_gpt2.py
    - test_tokenization_gpt2.py
  - gpt_neox_japanese
    - test_tokenization_gpt_neox_japanese.py
  - gpt_neox
    - test_tokenization_gpt_neox.py
  - gpt_sw3
    - test_tokenization_gpt_sw3.py
  - granite_speech
    - test_modeling_granite_speech.py
  - grounding_dino
    - test_processing_grounding_dino.py
  - herbert
    - test_tokenization_herbert.py
  - idefics2
    - test_processing_idefics2.py
  - idefics3
    - test_processing_idefics3.py
  - idefics
    - test_modeling_idefics.py
    - test_processing_idefics.py
  - instructblipvideo
    - test_processing_instructblipvideo.py
  - instructblip
    - test_processing_instructblip.py
  - internvl
    - test_processing_internvl.py
  - jamba
    - test_modeling_jamba.py
  - janus
    - test_modeling_janus.py
    - test_processing_janus.py
  - jetmoe
    - test_modeling_jetmoe.py
  - kosmos2_5
    - test_modeling_kosmos2_5.py
    - test_processor_kosmos2_5.py
  - kosmos2
    - test_processing_kosmos2.py
  - kyutai_speech_to_text
    - test_modeling_kyutai_speech_to_text.py
  - layoutlmv2
    - test_processing_layoutlmv2.py
    - test_tokenization_layoutlmv2.py
  - layoutlmv3
    - test_processing_layoutlmv3.py
    - test_tokenization_layoutlmv3.py
  - layoutlm
    - test_tokenization_layoutlm.py
  - layoutxlm
    - test_processing_layoutxlm.py
    - test_tokenization_layoutxlm.py
  - led
    - test_modeling_led.py
    - test_tokenization_led.py
  - lfm2_vl
    - test_processing_lfm2_vl.py
  - llama4
    - test_processing_llama4.py
  - llama
    - test_tokenization_llama.py
  - llava_next_video
    - test_processing_llava_next_video.py
  - llava_next
    - test_modeling_llava_next.py
    - test_processing_llava_next.py
  - llava_onevision
    - test_modeling_llava_onevision.py
    - test_processing_llava_onevision.py
  - llava
    - test_modeling_llava.py
    - test_processing_llava.py
  - longformer
    - test_tokenization_longformer.py
  - luke
    - test_tokenization_luke.py
  - lxmert
    - test_tokenization_lxmert.py
  - m2m_100
    - test_modeling_m2m_100.py
    - test_tokenization_m2m_100.py
  - marian
    - test_modeling_marian.py
    - test_tokenization_marian.py
  - markuplm
    - test_processing_markuplm.py
    - test_tokenization_markuplm.py
  - mbart50
    - test_tokenization_mbart50.py
  - mbart
    - test_tokenization_mbart.py
  - mgp_str
    - test_processing_mgp_str.py
  - ministral
    - test_modeling_ministral.py
  - mistral3
    - test_processing_mistral3.py
  - mixtral
    - test_modeling_mixtral.py
  - mllama
    - test_processing_mllama.py
  - mluke
    - test_tokenization_mluke.py
  - mobilebert
    - test_tokenization_mobilebert.py
  - modernbert
    - test_modeling_modernbert.py
  - moshi
    - test_tokenization_moshi.py
  - mpnet
    - test_tokenization_mpnet.py
  - musicgen_melody
    - test_processing_musicgen_melody.py
  - musicgen
    - test_processing_musicgen.py
  - mvp
    - test_modeling_mvp.py
    - test_tokenization_mvp.py
  - myt5
    - test_tokenization_myt5.py
  - nanochat
    - __init__.py
    - test_modeling_nanochat.py
  - nllb
    - test_tokenization_nllb.py
  - nougat
    - test_tokenization_nougat.py
  - olmoe
    - test_modeling_olmoe.py
  - olmo
    - test_modeling_olmo.py
  - omdet_turbo
    - test_processing_omdet_turbo.py
  - openai
    - test_modeling_openai.py
    - test_tokenization_openai.py
  - ovis2
    - test_modeling_ovis2.py
    - test_processor_ovis2.py
  - owlv2
    - test_modeling_owlv2.py
    - test_processing_owlv2.py
  - owlvit
    - test_processing_owlvit.py
  - paligemma
    - test_processing_paligemma.py
  - parakeet
    - test_processing_parakeet.py
  - pegasus
    - test_tokenization_pegasus.py
  - perceiver
    - test_tokenization_perceiver.py
  - perception_lm
    - test_processing_perception_lm.py
  - pix2struct
    - test_image_processing_pix2struct.py
    - test_processing_pix2struct.py
  - pixtral
    - test_processing_pixtral.py
  - plbart
    - test_modeling_plbart.py
    - test_tokenization_plbart.py
  - pop2piano
    - test_processing_pop2piano.py
    - test_tokenization_pop2piano.py
  - prophetnet
    - test_tokenization_prophetnet.py
  - qwen2_5_omni
    - test_modeling_qwen2_5_omni.py
    - test_processing_qwen2_5_omni.py
  - qwen2_5_vl
    - test_modeling_qwen2_5_vl.py
    - test_processing_qwen2_5_vl.py
  - qwen2_audio
    - test_modeling_qwen2_audio.py
    - test_processing_qwen2_audio.py
  - qwen2_vl
    - test_modeling_qwen2_vl.py
    - test_processing_qwen2_vl.py
  - qwen2
    - test_tokenization_qwen2.py
  - qwen3_omni_moe
    - test_processing_qwen3_omni_moe.py
  - qwen3_vl
    - test_processing_qwen3_vl.py
  - rag
    - test_modeling_rag.py
    - test_retrieval_rag.py
    - test_tokenization_rag.py
  - reformer
    - test_tokenization_reformer.py
  - rembert
    - test_tokenization_rembert.py
  - roberta
    - test_tokenization_roberta.py
  - roc_bert
    - test_tokenization_roc_bert.py
  - sam2_video
    - test_processor_sam2_video.py
  - sam2
    - test_processor_sam2.py
  - sam3_tracker_video
    - test_modeling_sam3_tracker_video.py
  - sam3_tracker
    - test_modeling_sam3_tracker.py
  - sam3_video
    - test_modeling_sam3_video.py
  - sam3
    - test_modeling_sam3.py
  - sam_hq
    - test_processing_sam_hq.py
  - sam
    - test_processing_sam.py
  - seamless_m4t
    - test_tokenization_seamless_m4t.py
  - seed_oss
    - test_modeling_seed_oss.py
  - shieldgemma2
    - test_processing_shieldgemma2.py
  - siglip
    - test_tokenization_siglip.py
  - smolvlm
    - test_modeling_smolvlm.py
    - test_processing_smolvlm.py
  - speech_to_text
    - test_tokenization_speech_to_text.py
  - speecht5
    - test_tokenization_speecht5.py
  - splinter
    - test_tokenization_splinter.py
  - squeezebert
    - test_tokenization_squeezebert.py
  - superglue
    - test_image_processing_superglue.py
  - t5
    - test_tokenization_t5.py
  - tapas
    - test_tokenization_tapas.py
  - trocr
    - test_processing_trocr.py
  - udop
    - test_processing_udop.py
    - test_tokenization_udop.py
  - video_llama_3
    - test_modeling_video_llama_3.py
    - test_processing_video_llama_3.py
  - vision_text_dual_encoder
    - test_processing_vision_text_dual_encoder.py
  - vitpose
    - test_image_processing_vitpose.py
  - vits
    - test_tokenization_vits.py
  - voxtral
    - test_modeling_voxtral.py
  - wav2vec2_bert
    - test_processing_wav2vec2_bert.py
  - wav2vec2
    - test_processing_wav2vec2.py
    - test_tokenization_wav2vec2.py
  - whisper
    - test_processing_whisper.py
    - test_tokenization_whisper.py
  - xglm
    - test_tokenization_xglm.py
  - xlm_roberta
    - test_tokenization_xlm_roberta.py
  - xlm
    - test_tokenization_xlm.py
  - xlnet
    - test_tokenization_xlnet.py
  - zamba2
    - test_modeling_zamba2.py
  - zamba
    - test_modeling_zamba.py
- peft_integration
  - test_peft_integration.py
- pipelines
- quantization
  - finegrained_fp8
    - test_fp8.py
  - torchao_integration
    - test_torchao.py
- test_modeling_common.py
- test_pipeline_mixin.py
- test_processing_common.py
- test_sentencepiece_backend_mixin.py
- test_tokenization_common.py
- test_tokenization_mistral_common.py
- test_tokenizers_backend_mixin.py
- tokenization
  - test_tokenization_fast.py
  - test_tokenization_utils.py
- trainer
  - test_trainer.py
- utils
utils

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,157 files changed

+30524

-65638

lines changed

`‎.github/workflows/get-pr-info.yml‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ on:`
`40`	`40`	`description:"The sha of the merge commit for the pull request (created by GitHub) in the base repository"`
`41`	`41`	`value:${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}`
`42`	`42`	`PR_MERGE_COMMIT_BASE_SHA:`
`43`		`-description:"The sha of the parent commit of thethemerge commit on the target branch in the base repository"`
	`43`	`+description:"The sha of the parent commit of the merge commit on the target branch in the base repository"`
`44`	`44`	`value:${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_BASE_SHA }}`
`45`	`45`	`PR_HEAD_COMMIT_DATE:`
`46`	`46`	`description:"The date of the head sha of the pull request branch in the head repository"`

`‎.github/workflows/self-comment-ci.yml‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ env:`
`27`	`27`	`jobs:`
`28`	`28`	`get-pr-number:`
`29`	`29`	`name:Get PR number`
`30`		`-if:${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') \|\| startsWith(github.event.comment.body, 'run slow') \|\| startsWith(github.event.comment.body, 'run_slow')) }}`
	`30`	`+if:${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1", "SunMarc", "eustlb", "MekkCyber", "vasqu", "ivarflakstad", "stevhliu", "ebezzam", "remi-or", "itazap", "3outeille"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') \|\| startsWith(github.event.comment.body, 'run slow') \|\| startsWith(github.event.comment.body, 'run_slow')) }}`
`31`	`31`	`uses:./.github/workflows/get-pr-number.yml`
`32`	`32`
`33`	`33`	`get-pr-info:`

`‎CONTRIBUTING.md‎`

Lines changed: 2 additions & 2 deletions

Original file line number	Diff line number	Diff line change
`@@ -125,9 +125,9 @@ If you're contributing a vision-language model (or any multimodal model that`
`125`	`125`	All new models should use the modular architecture pattern. Create a`modular_<model_name>.py` file using the modular model converter:
`126`	`126`
`127`	`127`	- Use the CLI,[`transformers add-new-model-like`](https://github.com/huggingface/transformers/blob/main/src/transformers/cli/add_new_model_like.py) to generate a modular skeleton and get started
`128`		`-- All code should be in the modular file if possible. Modeling must be in it, it's better if configuration is in it as well.[Modular guide](./modular_transformers#implementing-a-modular-file) shows a quick way to set up a modular file.`
	`128`	`+- All code should be in the modular file if possible. Modeling must be in it, it's better if configuration is in it as well.[Modular guide](./docs/source/en/modular_transformers.md#implementing-a-modular-file) shows a quick way to set up a modular file.`
`129`	`129`	`- Reuse existing patterns from similar models as much as possible`
`130`		`-- You can make the model compatible with inference engines such as vLLM or SGLang, and enable zero-effort integration. See specific requirements for model implementation in["Transformers modeling backend"](./transformers_as_backend#multimodal-models)`
	`130`	`+- You can make the model compatible with inference engines such as vLLM or SGLang, and enable zero-effort integration. See specific requirements for model implementation in["Transformers modeling backend"](./docs/source/en/transformers_as_backend.md#multimodal-models)`
`131`	`131`
`132`	`132`	`To verify your modular file is correct, run:`
`133`	`133`

`‎MIGRATION_GUIDE_V5.md‎`

Lines changed: 485 additions & 0 deletions

Large diffs are not rendered by default.

`‎README.md‎`

Lines changed: 1 addition & 1 deletion

Original file line number	Diff line number	Diff line change
`@@ -134,7 +134,7 @@ pipeline("the secret to baking a really good cake is ")`
`134`	`134`	To chat with a model, the usage pattern is the same. The only difference is you need to construct a chat history (the input to`Pipeline`) between you and the system.
`135`	`135`
`136`	`136`	`>[!TIP]`
`137`		`->You can also chat with a model directly from the command line.`
	`137`	+>You can also chat with a model directly from the command line, as long as[`transformers serve` is running](https://huggingface.co/docs/transformers/main/en/serving).
`138`	`138`	>```shell
`139`	`139`	`> transformers chat Qwen/Qwen2.5-0.5B-Instruct`
`140`	`140`	>```

`‎benchmark_v2/framework/benchmark_config.py‎`

Lines changed: 37 additions & 37 deletions

Original file line number	Diff line number	Diff line change
`@@ -2,9 +2,10 @@`
`2`	`2`	`importitertools`
`3`	`3`	`importjson`
`4`	`4`	`importlogging`
	`5`	`+fromfunctoolsimportlru_cache`
`5`	`6`	`fromtypingimportAny`
`6`	`7`
`7`		`-fromtransformers.utils.import_utilsimportis_flash_attn_2_available`
	`8`	`+fromtransformers.utils.import_utilsimportis_flash_attn_2_available,is_kernels_available`
`8`	`9`
`9`	`10`
`10`	`11`	`KERNELIZATION_AVAILABLE=False`
`@@ -18,17 +19,36 @@`
`18`	`19`	`logger=logging.getLogger(__name__)`
`19`	`20`
`20`	`21`
	`22`	`+@lru_cache`
	`23`	`+defis_fa2_or_kernel_available()->bool:`
	`24`	`+"""Returns True if the flash_attn_2 or a fallback kernel is available"""`
	`25`	`+# Early return if flash_attn_2 is available`
	`26`	`+ifis_flash_attn_2_available():`
	`27`	`+returnTrue`
	`28`	`+# Early return if kernels is not available`
	`29`	`+ifnotis_kernels_available():`
	`30`	`+logger.warning(`
	`31`	`+"flash_attention_2 is not available. kernels is not installed. Benchmarking flash_attention_2 will not "`
	`32`	`+"be possible."`
	`33`	`+ )`
	`34`	`+returnFalse`
	`35`	`+# If kernels is available, try to get the flash_attn_2 kernel`
	`36`	`+try:`
	`37`	`+fromkernelsimportget_kernel`
	`38`	`+`
	`39`	`+get_kernel("kernels-community/flash-attn")`
	`40`	`+exceptExceptionas_:`
	`41`	`+logger.warning(`
	`42`	`+"flash_attention_2 is not available. kernels is installed, but the flash_attn kernel is not available."`
	`43`	`+"Benchmarking flash_attention_2 will not be possible."`
	`44`	`+ )`
	`45`	`+returnFalse`
	`46`	`+`
	`47`	`+`
`21`	`48`	`classBenchmarkConfig:`
`22`	`49`	`"""Configuration for a single benchmark scenario."""`
`23`	`50`
`24`		`-all_attn_implementations= [`
`25`		`- ("flash_attention_2",None),`
`26`		`- ("eager",None),`
`27`		`- ("sdpa","math"),`
`28`		`- ("sdpa","flash_attention"),`
`29`		`- ("flex_attention",None),`
`30`		`- ]`
`31`		`-`
	`51`	`+all_attn_implementations= ["flash_attention_2","eager","sdpa","flex_attention"]`
`32`	`52`	`all_compiled_modes= [None,"default","reduce-overhead","max-autotune","max-autotune-no-cudagraphs"]`
`33`	`53`
`34`	`54`	`def__init__(`
`@@ -41,7 +61,6 @@ def __init__(`
`41`	`61`	`sequence_length:int=128,`
`42`	`62`	`num_tokens_to_generate:int=128,`
`43`	`63`	`attn_implementation:str="eager",`
`44`		`-sdpa_backend:str\|None=None,`
`45`	`64`	`compile_mode:str\|None=None,`
`46`	`65`	`compile_options:dict[str,Any]\|None=None,`
`47`	`66`	`kernelize:bool=False,`
`@@ -59,7 +78,6 @@ def __init__(`
`59`	`78`	`self.num_tokens_to_generate=num_tokens_to_generate`
`60`	`79`	`# Generation parameters`
`61`	`80`	`self.attn_implementation=attn_implementation`
`62`		`-self.sdpa_backend=sdpa_backend`
`63`	`81`	`# Optimization parameters`
`64`	`82`	`self.compile_mode=compile_mode`
`65`	`83`	`self.compile_options=compile_optionsifcompile_optionsisnotNoneelse {}`
`@@ -75,34 +93,21 @@ def check_validity(self, skip_validity_check: bool = False) -> None:`
`75`	`93`	`ifskip_validity_check:`
`76`	`94`	`return`
`77`	`95`	`# Check FA is installed`
`78`		`-ifself.attn_implementation=="flash_attention_2"andnotis_flash_attn_2_available():`
`79`		`-logger.warning(`
`80`		`-"Flash attention does not support compile mode. Defaulting to SDPA w/ flash attention backend."`
`81`		`- )`
	`96`	`+is_fa=self.attn_implementation=="flash_attention_2"`
	`97`	`+ifis_faandnotis_fa2_or_kernel_available():`
	`98`	`+logger.warning("Flash attention is not available. Defaulting to SDPA.")`
`82`	`99`	`self.attn_implementation="sdpa"`
`83`		`-self.sdpa_backend="flash_attention"`
`84`	`100`	`# Flash attention does not support compile mode, so we turn it off # FIXME: it would be better to support it`
`85`		`-is_fa=self.attn_implementation=="flash_attention_2"`
`86`		`-is_fa\|=self.attn_implementation=="sdpa"andself.sdpa_backend=="flash_attention"`
`87`		`-ifis_fa:`
	`101`	`+ifis_faandself.compile_modeisnotNone:`
`88`	`102`	`logger.warning("Flash attention does not support compile mode. Turning off compile mode.")`
`89`	`103`	`self.compile_mode=None`
`90`		`-# Handle SDPA backend if not determined by the config (needs to be done before skipping duplicates)`
`91`		`-ifself.attn_implementation=="sdpa"andself.sdpa_backendisNone:`
`92`		`-default_backend="flash_attention"# FIXME: torch has a _cur_sdpa_kernel_backends but it fails`
`93`		`-logger.warning(f"No SDPA backend provided, using{default_backend} instead.")`
`94`		`-self.sdpa_backend=default_backend`
	`104`	`+# Handle continuous batching cases`
`95`	`105`	`ifself.continuous_batching:`
`96`	`106`	`ifself.attn_implementation=="flex_attention":`
`97`	`107`	`logger.error(`
`98`		`-"disabling continuous batching because of invalid configuration: flex attention is not supported"`
	`108`	`+"Disabling continuous batching because of invalid configuration: flex attention is not supported."`
`99`	`109`	`)`
`100`	`110`	`self.continuous_batching=False`
`101`		`-elifself.attn_implementation=="sdpa"andself.sdpa_backendisnotNone:`
`102`		`-logger.warning(`
`103`		`-"when continuous batching is enabled, sdpa_backend must be None because of the attention mask, setting it to None"`
`104`		`- )`
`105`		`-self.sdpa_backend="math"`
`106`	`111`
`107`	`112`	`@property`
`108`	`113`	`defhash(self)->str:`
`@@ -115,7 +120,6 @@ def infer_name(self, compact: bool = True) -> str:`
`115`	`120`	`gpu_monitor_str="monitored"ifself.gpu_monitoringelse"unmonitored"`
`116`	`121`	`dimensions_str=f"b{self.batch_size}_s{self.sequence_length}_n{self.num_tokens_to_generate}"`
`117`	`122`	`attn_code=self.attn_implementation`
`118`		`-attn_code+=f"_{self.sdpa_backend}"ifself.attn_implementation=="sdpa"else""`
`119`	`123`	`compile_str=f"compiled_{self.compile_mode}"ifself.compile_modeisnotNoneelse"uncompiled"`
`120`	`124`	`kernelize_str="kernelized"ifself.kernelizeelse"unkernelized"`
`121`	`125`	`continuous_batching_str="cb"ifself.continuous_batchingelse"generate"`
`@@ -125,7 +129,6 @@ def infer_name(self, compact: bool = True) -> str:`
`125`	`129`	`gpu_monitor_str= ("with"ifself.gpu_monitoringelse"no")+" GPU monitoring"`
`126`	`130`	`dimensions_str=f"batch size{self.batch_size}, sequence length{self.sequence_length},{self.num_tokens_to_generate} generated tokens"`
`127`	`131`	`attn_code=f"{self.attn_implementation} attention"`
`128`		`-attn_code+=f" with{self.sdpa_backend} backend"ifself.attn_implementation=="sdpa"else""`
`129`	`132`	`compile_str="compiled"ifself.compile_modeisnotNoneelse"not compiled"`
`130`	`133`	`kernelize_str="kernelized"ifself.kernelizeelse"not kernelized"`
`131`	`134`	`continuous_batching_str="continuous batching"ifself.continuous_batchingelse"regular generate"`
`@@ -145,7 +148,6 @@ def to_dict(self) -> dict[str, Any]:`
`145`	`148`	`"sequence_length":self.sequence_length,`
`146`	`149`	`"num_tokens_to_generate":self.num_tokens_to_generate,`
`147`	`150`	`"attn_implementation":self.attn_implementation,`
`148`		`-"sdpa_backend":self.sdpa_backend,`
`149`	`151`	`"compile_mode":self.compile_mode,`
`150`	`152`	`"compile_options":self.compile_options\| {},# to avoid inplace modification of the original dict`
`151`	`153`	`"kernelize":self.kernelize,`
`@@ -162,7 +164,6 @@ def from_dict(cls, data: dict[str, Any], skip_validity_check: bool = False) -> "`
`162`	`164`	`sequence_length=data.get("sequence_length",128),`
`163`	`165`	`num_tokens_to_generate=data.get("num_tokens_to_generate",128),`
`164`	`166`	`attn_implementation=data.get("attn_implementation","eager"),`
`165`		`-sdpa_backend=data.get("sdpa_backend"),`
`166`	`167`	`compile_mode=data.get("compile_mode"),`
`167`	`168`	`compile_options=data.get("compile_options"),`
`168`	`169`	`kernelize=data.get("kernelize",False),`
`@@ -213,7 +214,7 @@ def get_config_by_level(level: int) -> list[BenchmarkConfig]:`
`213`	`214`	`configs= []`
`214`	`215`	`# Early return if level is greater than 3: we generate all combinations of configs, maybe even w/ all compile modes`
`215`	`216`	`iflevel>=3:`
`216`		`-forattn_implementation,sdpa_backendinBenchmarkConfig.all_attn_implementations:`
	`217`	`+forattn_implementationinBenchmarkConfig.all_attn_implementations:`
`217`	`218`	`# Usually there is not much to gain by compiling with other modes, but we allow it for level 4`
`218`	`219`	`compile_modes=BenchmarkConfig.all_compiled_modesiflevel>=4else [None,"default"]`
`219`	`220`	`forcmincompile_modes:`
`@@ -222,7 +223,6 @@ def get_config_by_level(level: int) -> list[BenchmarkConfig]:`
`222`	`223`	`configs.append(`
`223`	`224`	`BenchmarkConfig(`
`224`	`225`	`attn_implementation=attn_implementation,`
`225`		`-sdpa_backend=sdpa_backend,`
`226`	`226`	`compile_mode=cm,`
`227`	`227`	`kernelize=kernelize_on,`
`228`	`228`	`continuous_batching=cb_on,`
`@@ -240,5 +240,5 @@ def get_config_by_level(level: int) -> list[BenchmarkConfig]:`
`240`	`240`	`configs.append(BenchmarkConfig(attn_implementation="sdpa",compile_mode="default"))`
`241`	`241`	`configs.append(BenchmarkConfig(attn_implementation="flex_attention",compile_mode="default",kernelize=True))`
`242`	`242`	`configs.append(BenchmarkConfig(attn_implementation="flash_attention_2",kernelize=True))`
`243`		`-configs.append(BenchmarkConfig(attn_implementation="paged\|sdpa",continuous_batching=True))`
	`243`	`+configs.append(BenchmarkConfig(attn_implementation="sdpa",continuous_batching=True))`
`244`	`244`	`returnconfigs`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Commit5a4a08a

File tree

1,157 files changed

Some content is hidden

1,157 files changed

`‎.github/workflows/get-pr-info.yml‎`

`‎.github/workflows/self-comment-ci.yml‎`

`‎CONTRIBUTING.md‎`

`‎MIGRATION_GUIDE_V5.md‎`

`‎README.md‎`

`‎benchmark_v2/framework/benchmark_config.py‎`

0 commit comments