o
    i                     @   sx   d dl mZ ddlmZ erddlmZ ddlmZmZm	Z	m
Z
 ddlmZ e	 r-d dlZe
eZG d	d
 d
eZdS )    )TYPE_CHECKING   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_kernels_availableis_torch_availablelogging)get_module_from_nameNc                       s|   e Zd ZdZdZ fddZdd Zdd
dZddded	e	fddZ
		dddZdd Zed	e	fddZdd Z  ZS )EetqHfQuantizerz:
    8-bit quantization from EETQ quantization method
    Fc                    s   t  j|fi | d S )N)super__init__)selfquantization_configkwargs	__class__ h/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/quantizers/quantizer_eetq.pyr   (   s   zEetqHfQuantizer.__init__c                 O   s   t  stdt stdtj std|d}|d u r't	d d S t
|trBt|dkr8d| v s>d| v rDtd	d S d S )
NzHLoading an EETQ quantized model requires kernels (`pip install kernels`)zNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)z/No GPU found. A GPU is needed for quantization.
device_mapzYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.r   cpudiskzYou are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorr   torchcudais_availableRuntimeErrorgetloggerwarning_once
isinstancedictlenvalues
ValueError)r   argsr   r   r   r   r   validate_environment+   s$   


$z$EetqHfQuantizer.validate_environmentdtypetorch.dtypereturnc                 C   s   |t jkr
td |S )NzLWe suggest you to set `dtype=torch.float16` for better efficiency with EETQ.)r   float16r   info)r   r(   r   r   r   update_dtypeB   s   

zEetqHfQuantizer.update_dtypemodelr   
param_namec                 K   s>   ddl m} t||\}}t||r| js|dkrdS dS dS )Nr   )
EetqLinearbiasFT)integrations.eetqr0   r   r!   pre_quantized)r   r.   r/   r   r0   moduletensor_namer   r   r   param_needs_quantizationG   s   
z(EetqHfQuantizer.param_needs_quantizationc                 K   s8   ddl m} | || jj|j| _||| j| jd}d S )Nr   )replace_with_eetq_linear)modules_to_not_convertr3   )integrationsr7   get_modules_to_not_convertr   r8   _keep_in_fp32_modulesr3   )r   r.   r   r7   r   r   r   $_process_model_before_weight_loadingS   s   

z4EetqHfQuantizer._process_model_before_weight_loadingc                 C      dS NTr   r   r   r   r   is_serializableb   s   zEetqHfQuantizer.is_serializablec                 C   r=   r>   r   r?   r   r   r   is_trainablee   s   zEetqHfQuantizer.is_trainablec                 C   s   ddl m} || S )Nr   )EetqQuantize)r2   rB   )r   rB   r   r   r   get_quantize_opsi   s   z EetqHfQuantizer.get_quantize_ops)r(   r)   r*   r)   )r.   r   )__name__
__module____qualname____doc__requires_calibrationr   r'   r-   strboolr6   r<   r@   propertyrA   rC   __classcell__r   r   r   r   r   !   s    

r   )typingr   baser   modeling_utilsr   utilsr   r   r	   r
   quantizers_utilsr   r   
get_loggerrD   r   r   r   r   r   r   <module>   s   
