o
    i                     @   s   d dl Z d dlmZ d dlmZ ddlmZ erddlmZ ddl	m
Z
mZmZmZ dd	lmZmZ e r9d dlZeeZG d
d deZdS )    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                       sr   e Zd ZdZdZdef fddZdd ZdddZdd Z	dddZ
dddZed
efddZdd Z  ZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    the GPT-QModel package (Python import name `gptqmodel`). Quantization is done under the hood for users if they
    load a non-prequantized model.
    Fquantization_configc                    sD   t  j|fi | t stdddlm} || j | _	d S )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r	   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__ h/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/quantizers/quantizer_gptq.pyr   ,   s
   zGptqHfQuantizer.__init__c                 O   s   t  stdt }|stj stdt stdt r?tt	j
dtdk s;tt	j
dtdk rAtdd S d S )	Nr   z2GPU is required to quantize or run quantize model.zTLoading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) library.	gptqmodelz1.4.3optimum1.23.99zJThe gptqmodel version should be >= 1.4.3, optimum version should >= 1.24.0)r	   r   r   torchcudais_availableRuntimeErrorr   parse	importlibmetadata)r   argsr   gptq_supports_cpur   r   r   validate_environment5   s   z$GptqHfQuantizer.validate_environmentdtypetorch.dtypereturnc                 C   s   |t jkr
td |S )NzLWe suggest you to set `dtype=torch.float16` for better efficiency with GPTQ.)r"   float16loggerinfo)r   r,   r   r   r   update_dtypeD   s   

zGptqHfQuantizer.update_dtypec                 C   s   |d u rdt di}|S )N cpu)r"   device)r   
device_mapr   r   r   update_device_mapI   s   z!GptqHfQuantizer.update_device_mapmodelr   c                 K   sb   |j jdkr
td| jr/ttjdtdkr#| j	|}d S | jj	|fi |}d S d S )N	input_idsz%We can only quantize pure text model.r    r!   )
r   main_input_namer%   pre_quantizedr   r&   r'   r(   r   convert_modelr   r8   r   r   r   r   $_process_model_before_weight_loadingN   s   z4GptqHfQuantizer._process_model_before_weight_loadingc                 K   sV   | j r| j|}d S | jjd u r|j| j_| j|| jj t| j	 |j
_d S )N)r;   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigr=   r   r   r   #_process_model_after_weight_loadingY   s   
z3GptqHfQuantizer._process_model_after_weight_loadingc                 C      dS NTr   r   r   r   r   is_trainablec   s   zGptqHfQuantizer.is_trainablec                 C   rF   rG   r   rH   r   r   r   is_serializableg   s   zGptqHfQuantizer.is_serializable)r,   r-   r.   r-   )r8   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r+   r2   r7   r>   rE   propertyboolrI   rJ   __classcell__r   r   r   r   r   #   s    	



r   )r'   typingr   	packagingr   baser   modeling_utilsr   utilsr   r	   r
   r   utils.quantization_configr   r   r"   
get_loggerrK   r0   r   r   r   r   r   <module>   s   
