o
    iM6                     @   s  d dl Z ddlmZ ddlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZ dd	l m!Z! dd
l"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZG i de#de'de)d e5d!ed"e=d#e?d$e3d%e-d&e7d'e9d(e+d)e/d*eEd+e%d,eGd-eCe1e!e;eAd.ZHi dede
de
d%ed ed!ed"ed#ed$ed'ed(ed)ed&ed*ed+e	d,ed-eeeeed.ZIeJeKZLG d/d0 d0ZMG d1d2 d2ZNd3eOfd4d5ZPd6eOfd7d8ZQd9d: ZRdS );    N   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SinqConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SinqHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerawqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfp_quanteetqhiggshqqzcompressed-tensors
fbgemm_fp8torchaobitnetvptqspqr)fp8z
auto-roundmxfp4sinqc                   @   s.   e Zd ZdZedefddZedd ZdS )AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    quantization_config_dictc                 C   s   | d}| dds| ddr!| ddrdnd}tj| }n|d u r)td|tvr;td| d	tt  t| }||S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrF   rG   suffix
target_cls rZ   ^/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/quantizers/auto.pyrV   |   s    


z AutoQuantizationConfig.from_dictc                 K   sV   t j|fi |}t|dd d u rtd| d|j}| |}|jdi | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized.rZ   )r   from_pretrainedgetattrrQ   r\   rV   update)rW   pretrained_model_name_or_pathkwargsmodel_configrF   r\   rZ   rZ   r[   r]      s   

z&AutoQuantizationConfig.from_pretrainedN)__name__
__module____qualname____doc__classmethoddictrV   r]   rZ   rZ   rZ   r[   rE   v   s    rE   c                   @   s\   e Zd ZdZedeeB fddZedd ZedeeB dedB fd	d
Z	e
dd ZdS )AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    r\   c                 K   sx   t |tr
t|}|j}|tjkr|jr|d7 }n|d7 }|tvr0t	d| dt
t  t| }||fi |S )NrK   rJ   rM   rN   )
isinstancerh   rE   rV   rG   r   rP   rH   rT   rQ   rS   rU   )rW   r\   ra   rG   rY   rZ   rZ   r[   from_config   s   




zAutoHfQuantizer.from_configc                 K   s   t j|fi |}| |S )N)rE   r]   rk   )rW   r`   ra   r\   rZ   rZ   r[   r]      s   
zAutoHfQuantizer.from_pretrainedquantization_config_from_argsNc              	   C   s  |durd}nd}t |trt |trt|}nt|}|dur9|jj|jjkr9td|jj d|jj dt |tt	tt
tttfrh|durh| }| D ]
\}}t||| qQ|dt|  d7 }|dkrzt |ttfszt| |S t| |S )	z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        NzYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. zThe model is quantized with z but you are passing a z| config. Please make sure to pass the same quantization config class to `from_pretrained` with different loading attributes.z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rj   rh   r   rV   rE   	__class__rc   rQ   r   r   r   r
   r   r   get_loading_attributesitemssetattrrS   rU   warningswarnloggerinfo)rW   r\   rl   warning_msgloading_attr_dictattrvalrZ   rZ   r[   merge_quantization_configs   sF   	




z*AutoHfQuantizer.merge_quantization_configsc                 C   s   |  dd }|  dds|  ddr"|  ddrdnd}tj| }n|d u r*td|tvr@td| d	tt	  d
 dS dS )NrG   rH   FrI   rJ   rK   rL   rM   rN   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rO   r   rP   rQ   rR   rt   warningrS   rT   rU   )rF   rG   rX   rZ   rZ   r[   supports_quant_method   s    
z%AutoHfQuantizer.supports_quant_method)rc   rd   re   rf   rg   r   rh   rk   r]   rz   staticmethodr|   rZ   rZ   rZ   r[   ri      s    
<ri   methodc                        fdd}|S )z-Register a custom quantization configuration.c                    6    t v rtd  dt| tstd| t  < | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)rR   rQ   
issubclassr   	TypeErrorrW   r~   rZ   r[   register_config_fn     
z8register_quantization_config.<locals>.register_config_fnrZ   )r~   r   rZ   r   r[   register_quantization_config     
r   namec                    r   )zRegister a custom quantizer.c                    r   )NzQuantizer 'r   z!Quantizer must extend HfQuantizer)rT   rQ   r   r   r   r   r   rZ   r[   register_quantizer_fn(  r   z1register_quantizer.<locals>.register_quantizer_fnrZ   )r   r   rZ   r   r[   register_quantizer%  r   r   c                 C   s   t | d}|rt| jsd}|s|d ur,|r t| j|| _n|| _tj| j|d}nd }|d ur[|j||d ||}|| } |	| } t
|jdds[|jj}t
|d||d< || |fS )Nr\   F)pre_quantized)
device_mapweights_only
dequantizevaluequant)hasattrri   r|   r\   rz   rk   validate_environmentupdate_device_mapupdate_tp_planupdate_ep_planr^   rG   )configr\   r   r   
user_agentr   hf_quantizerrG   rZ   rZ   r[   get_hf_quantizer5  s4   




r   )Srr   models.auto.configuration_autor   utilsr   utils.quantization_configr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   quantizer_aqlmr   quantizer_auto_roundr   quantizer_awqr   quantizer_bitnetr    quantizer_bnb_4bitr!   quantizer_bnb_8bitr"   quantizer_compressed_tensorsr#   quantizer_eetqr$   quantizer_fbgemm_fp8r%   quantizer_finegrained_fp8r&   quantizer_fp_quantr'   quantizer_gptqr(   quantizer_higgsr)   quantizer_hqqr*   quantizer_mxfp4r+   quantizer_quantor,   quantizer_quarkr-   quantizer_sinqr.   quantizer_spqrr/   quantizer_torchaor0   quantizer_vptqr1   rT   rR   
get_loggerrc   rt   rE   ri   strr   r   r   rZ   rZ   rZ   r[   <module>   s   `	
	

)v