o
    i`                     @   s   d dl Zd dlmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ dd	lmZ e r7d dlZeeZG d
d deZdS )    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_gptqmodel_availableis_torch_availablelogging)
AwqBackendc                       sZ   e Zd ZdZdZ fddZdd Zdd ZdddZdd Z	dd Z
edd Z  ZS )AwqQuantizerzu
    4-bit quantization for Activation-aware Weight Quantization(AWQ) (https://huggingface.co/papers/2306.00978)
    Tc                    s   t  j|fi | d S )N)super__init__)selfquantization_configkwargs	__class__ g/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/quantizers/quantizer_awq.pyr   +   s   zAwqQuantizer.__init__c                 K   s    t  stdt stdd S )NzaLoading an AWQ quantized model requires gptqmodel. Please install it with `pip install gptqmodel`zMLoading an AWQ quantized model requires accelerate (`pip install accelerate`))r	   ImportErrorr   )r   r   r   r   r   validate_environment.   s   z!AwqQuantizer.validate_environmentc                 C   s^   |t jkrt j st j rtd t j}|S |t jkr-t j s(t j r-td |S )Nz[`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`.zWWe suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.)torchbfloat16cudais_availablexpuloggerwarningfloat16)r   dtyper   r   r   update_dtype7   s   
zAwqQuantizer.update_dtypemodelr   c                 K   sV   ddl m}m} | j|| jj|jdd| _||| j| j|dd}|||jj	}d S )Nr   )replace_quantization_scalesreplace_with_awq_linearT)add_default_skips
device_map)r   modules_to_not_convertr'   )
integrationsr$   r%   get_modules_to_not_convertr   r(   _keep_in_fp32_modulesgetconfig
model_type)r   r#   r   r$   r%   r   r   r   $_process_model_before_weight_loadingA   s   z1AwqQuantizer._process_model_before_weight_loadingc                 K   s    ddl m} ||| jjd d S )Nr   )hf_gptqmodel_post_init)use_act_order)gptqmodel.utils.modelr0   r   desc_act)r   r#   r   r0   r   r   r   #_process_model_after_weight_loadingQ   s   z0AwqQuantizer._process_model_after_weight_loadingc                 C   s&   | j jtjtjfv rtd dS dS )Nz7You cannot save an AWQ model that uses Exllama backend!FT)r   backendr   
EXLLAMA_V1
EXLLAMA_V2r   r   r   r   r   r   is_serializableV   s   
zAwqQuantizer.is_serializablec                 C   s   t tj dt dkS )N	gptqmodelz5.0.0)r   parse	importlibmetadatar8   r   r   r   is_trainable]   s   zAwqQuantizer.is_trainable)r#   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r"   r/   r4   r9   propertyr>   __classcell__r   r   r   r   r   #   s    	

r   )importlib.metadatar<   typingr   	packagingr   baser   modeling_utilsr   utilsr   r	   r
   r   utils.quantization_configr   r   
get_loggerr?   r   r   r   r   r   r   <module>   s   
