o
    i3+                     @   s   d dl mZ ddlmZ ddlmZmZmZ ddlm	Z	 ddl
mZ er*ddlmZ e r1d d	lZe rCd d
lmZ edd Zee_eeZG dd de	Zd	S )    )TYPE_CHECKING   )prepare_for_hqq_linear)is_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModelN)	HQQLinearc                 C   s   t jd| j| jdS )Nr   )dtypedevice)torchemptycompute_dtyper   self r   g/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/quantizers/quantizer_hqq.pyweight$   s   r   c                       s|   e Zd ZdZdZ fddZdd Zddd	ed
efddZ	dd Z
		dddZdddZdd Zed
efddZ  ZS )HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    Fc                    sH   t  stdt j|fi | d | _d| _td d  dh | _d S )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.Fbias)	r   ImportErrorsuper__init__r   using_multi_gpur   state_dict_keyshqq_keys)r   quantization_configkwargs	__class__r   r   r   5   s   zHqqHfQuantizer.__init__c                 O   s   | j d u rd|v r|d | _ n	tj| _ td |d}t|tr?d| v s.d| v r2t	dt
t| dk| _d S d S )Nr   zOSetting dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r   r   float32loggerinfoget
isinstancedictvalues
ValueErrorlensetr   )r   argsr    r#   r   r   r   validate_environment@   s   



z#HqqHfQuantizer.validate_environmentmodelr   
param_namereturnc                 K   s   t ||\}}t|tjjS N)r
   r*   r   nnLinear)r   r2   r3   r    module_r   r   r   param_needs_quantization   s   z'HqqHfQuantizer.param_needs_quantizationc                    s   dd   fdd_ S )Nc                 S   s4   t || j|   }| jd ur|| j7 }|S r5   )r   matmultor   
dequantizetr   )r   xoutr   r   r   forward_with_device   s   

zEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_devicec                    s
    | S r5   r   )r?   rA   	hqq_layerr   r   <lambda>   s   
 z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>)forward)r   rC   r   rB   r   _patch_layer_for_multigpu   s   z(HqqHfQuantizer._patch_layer_for_multigpuc                 K   s   t || jd}d S )N)r   )r   r   r   r2   r    r   r   r   $_process_model_before_weight_loading   s   z3HqqHfQuantizer._process_model_before_weight_loadingc                 K   s   d|_ |  |_|S NT)is_hqq_quantizedis_serializableis_hqq_serializablerG   r   r   r   #_process_model_after_weight_loading   s   
z2HqqHfQuantizer._process_model_after_weight_loadingc                 C      dS rI   r   r   r   r   r   rK     s   zHqqHfQuantizer.is_serializablec                 C   rN   rI   r   r   r   r   r   is_trainable  s   zHqqHfQuantizer.is_trainable)r2   r   )__name__
__module____qualname____doc__requires_calibrationr   r1   strboolr:   rF   rH   rM   rK   propertyrO   __classcell__r   r   r!   r   r   -   s    PY


	r   )typingr   integrationsr   utilsr   r   r   baser	   quantizers_utilsr
   modeling_utilsr   r   hqq.core.quantizer   rW   r   
get_loggerrP   r'   r   r   r   r   r   <module>   s    

