o
    ifK                  
   @  s(  d dl mZ d dlmZ d dlmZ ddlmZmZm	Z	m
Z
mZmZmZmZ er4ddlmZ ddlmZ i ddd	dd
ddddddddddddddddddddddddddddddddddddddZdd Zdadd Z	 d6d7d)d*Zg d+Z			,d8d9d4d5ZdS ):    )annotations)deepcopy)TYPE_CHECKING   )ChunkConcatenate"ErnieFuseAndSplitTextVisionExpertsForce16BytesAlignmentMergeModulelist	TransposeWeightConverterWeightRenaming)PreTrainedModel)HfQuantizermixtralminimax
minimax_m2	qwen2_moedeepseek_v2deepseek_v3dots1ernie4_5_moeglm4_moeglm4_moe_liteglm_moe_dsa	glm4v_moelongcat_flash
solar_open	qwen3_moeqwen3_omni_moeqwen3_omni_moe_thinkerrt_detr)
qwen3_nextqwen3_5_moehunyuan_v1_moe	flex_olmoolmoe
exaone_moe
rt_detr_v2pp_doclayout_v3c                  C  s  i dt dddgdt ddt dd	t d
dgdt ddt ddt ddgdtddt gdtddt gdgdt ddtddgdtdd td!d gdtd"gd#tdd gdgd$td%d&gdtdd td!d gdtd'dtdd gdgd(tddtd!d)d*d+t gdtddtd!d)d*d+t gdgd,t ddt d-d.tddgdtdd td!d gdtd"d#tdd gdgd/td0d1gd2tdd td!d gdtd3d4tdd gdgd5t d6d7t d8d9t d:d;t d<d=t d>d?t d@dAt dBdCt dDdEt dFdGtdHdItdd!dJgdtdKdLtdd!dJgdtdMgdNdOgtdd gdtdPgdQdRgtdd!dSgdtdTdUgdVdWgtdd!dSgdgdXt dYdZt d[d\t d]d^t d_d`gdat d[d\t d]d^t d_d`t dbdcgddt dYdZt dedft dgdht d]d^t d_d`t didjt dkdlt dmdnt dodpt dqdrt dsdtt dudvt dwdxt dydzt d{d|t d}d~gdt dYdZt dedft d]d^t d_d`gdt d[d\t d]dt d_dt dbdcgdtddgd2tdd td!d gdtdd4tdd gdgdt dddgdt dddt dddgi} | d  t dddt dddg7  < | d$  | d< | d  t dMdg7  < | d  | d< | d  t ddg7  < | d$  | d< | d  t ddg7  < | d  | d< | d  | d$  7  < t	
 D ]\}}|| v ruqj| |  | |< qj| S )Nqwen3_5_textz^model.language_modelmodelsource_patternstarget_patternst5gemma2z((?<!vision_model\.)encoder.embed_tokens.z encoder.text_model.embed_tokens.z (?<!vision_model\.)encoder.norm.zencoder.text_model.norm.z"(?<!vision_model\.)encoder.layers.zencoder.text_model.layers.t5gemma2_encoderz^embed_tokens.ztext_model.embed_tokens.z^norm.ztext_model.norm.z^layers.ztext_model.layers.gpt_osszmlp.experts.gate_up_proj$zmlp.experts.gate_up_proj)r-   r.   
operationszmlp.experts.down_proj$zmlp.experts.down_projr   z.block_sparse_moe.z.mlp.z.experts.*.w1.weightz.experts.*.w3.weightz.experts.gate_up_projr   )dimr   z.experts.*.w2.weightz.experts.down_projr   zmlp.experts.*.gate_proj.weightzmlp.experts.*.up_proj.weightzmlp.experts.*.down_proj.weightqwen3_vl_moe   T)
check_dimsphimoez.gate.weightz.router.weightlfm2_moez feed_forward.experts.*.w1.weightz feed_forward.experts.*.w3.weightz!feed_forward.experts.gate_up_projz feed_forward.experts.*.w2.weightzfeed_forward.experts.down_projernie4_5_vl_moevision_modelvision_towerzspatial_linear.0zspatial_linear.fc1zspatial_linear.2zspatial_linear.fc2zspatial_linear.3zspatial_linear.lnztemporal_linear.0ztemporal_linear.fc1ztemporal_linear.2ztemporal_linear.fc2ztemporal_linear.3ztemporal_linear.lnz!(?<!language_model\.)embed_tokenszlanguage_model.embed_tokensz(?<!language_model\.)layerszlanguage_model.layerszmlp.gate.weight_1zmlp.vision_moe.gate.weight)dim0dim1zmlp.gate.weightzmlp.text_moe.gate.weightz'mlp.moe_statics.e_score_correction_biasz5mlp.text_moe.gate.moe_statics.e_score_correction_biasz7mlp.vision_moe.gate.moe_statics.e_score_correction_biaszexperts.*.down_proj.weightztext_moe.experts.down_projzvision_moe.experts.down_proj)	stack_dim
concat_dimzexperts.*.gate_proj.weightzexperts.*.up_proj.weightztext_moe.experts.gate_up_projzvision_moe.experts.gate_up_projdetrzbackbone.conv_encoderbackboneout_projo_projzlayers.(\d+).fc1zlayers.\1.mlp.fc1zlayers.(\d+).fc2zlayers.\1.mlp.fc2r!   zencoder.encoder.(\d+).layerszencoder.aifi.\1.layersconditional_detrzself_attn.out_projzself_attn.o_projzencoder_attn.out_projzencoder_attn.o_projz%decoder.layers.(\d+).sa_qcontent_projz*decoder.layers.\1.self_attn.q_content_projz!decoder.layers.(\d+).sa_qpos_projz&decoder.layers.\1.self_attn.q_pos_projz%decoder.layers.(\d+).sa_kcontent_projz*decoder.layers.\1.self_attn.k_content_projz!decoder.layers.(\d+).sa_kpos_projz&decoder.layers.\1.self_attn.k_pos_projzdecoder.layers.(\d+).sa_v_projz"decoder.layers.\1.self_attn.v_projz%decoder.layers.(\d+).ca_qcontent_projz-decoder.layers.\1.encoder_attn.q_content_projz!decoder.layers.(\d+).ca_qpos_projz)decoder.layers.\1.encoder_attn.q_pos_projz%decoder.layers.(\d+).ca_kcontent_projz-decoder.layers.\1.encoder_attn.k_content_projz!decoder.layers.(\d+).ca_kpos_projz)decoder.layers.\1.encoder_attn.k_pos_projzdecoder.layers.(\d+).ca_v_projz%decoder.layers.\1.encoder_attn.v_projz&decoder.layers.(\d+).ca_qpos_sine_projz.decoder.layers.\1.encoder_attn.q_pos_sine_projdeformable_detrd_finezlayers.\1.mlp.layers.0zlayers.\1.mlp.layers.1jambaz'feed_forward.experts.*.gate_proj.weightz%feed_forward.experts.*.up_proj.weightz'feed_forward.experts.*.down_proj.weighttimm_wrapperz(.+)ztimm_model.\1legacyzLayerNorm.gammazLayerNorm.weightzLayerNorm.betazLayerNorm.biasz
.weight_g$z".parametrizations.weight.original0z
.weight_v$z".parametrizations.weight.original1r   z,mlp.gate.moe_statics.e_score_correction_biasr   z).block_sparse_moe.e_score_correction_biasz.mlp.e_score_correction_biasr'   zmlp.e_score_correction_biasz mlp.gate.e_score_correction_biasqwen3_5_moe_text)r   r   r	   r
   r   r   r   r   copy_MODEL_TO_CONVERSION_PATTERNitems)mapping
model_typebase_pattern rQ   a/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/conversion_mapping.py$_build_checkpoint_conversion_mappingD   s:  
	:
IU
f
u
 )  /  5  K  Q  W
  f  n  y
rS   Nc                 C  s   t d u rt a tt | S N)$_checkpoint_conversion_mapping_cacherS   r   get)rO   rQ   rQ   rR   !get_checkpoint_conversion_mappinga  s   rW   FrO   strrN   &list[WeightConverter | WeightRenaming]	overwriteboolreturnNonec                 C  s6   t d u rt a | t v r|std|  d|t | < d S )NzModel type z5 already exists in the checkpoint conversion mapping.)rU   rS   
ValueError)rO   rN   rZ   rQ   rQ   rR   &register_checkpoint_conversion_mappingh  s
   r_   )aria	ayavisioncolpaliemu3fuyugotocr2gemma3internvlllavamistral3mllama	paligemmashieldgemma2qwen2vl
qwen2_5_vl
videollavavipllava
sam3_videosam3sam3_trackersam3_tracker_videopaddleocrvlr9   r@   Tr+   r   key_mappingdict[str, str] | Nonehf_quantizerHfQuantizer | None
add_legacyc                 C  s   g }|durdd |  D }ntdd | jjdd D r)dd | j  D }t| jdd}|durAt|}|durA|| |rJ|td	 |durU||	  |S )
z
    For a given `model`, obtain the weight conversion mapping if any are registered either as a simple renaming
    `_checkpoint_conversion_mapping` class argument, or in the general WeightConverter mapping.
    Nc                 S     g | ]
\}}t ||d qS r,   r   .0kvrQ   rQ   rR   
<listcomp>  s    z0get_model_conversion_mapping.<locals>.<listcomp>c                 s  s(    | ]}t D ]
}||j v V  qqd S rT   )VLMS__name__lower)r   
class_nameallowed_namerQ   rQ   rR   	<genexpr>  s    z/get_model_conversion_mapping.<locals>.<genexpr>c                 S  r{   r|   r}   r~   rQ   rQ   rR   r     s    
rO   rI   )
rM   any	__class____mro___checkpoint_conversion_mappinggetattrconfigrW   extendget_weight_conversions)r+   rv   rx   rz   weight_conversionsrO   model_specific_conversionsrQ   rQ   rR   get_model_conversion_mapping  s&   

r   )F)rO   rX   rN   rY   rZ   r[   r\   r]   )NNT)
r+   r   rv   rw   rx   ry   rz   r[   r\   rY   )
__future__r   rK   r   typingr   core_model_loadingr   r   r   r	   r
   r   r   r   modeling_utilsr   
quantizersr   rL   rS   rU   rW   r_   r   r   rQ   rQ   rQ   rR   <module>   s~   (	
  