o
    i                     @   s   d dl mZ ddlmZ ddlmZ ddlmZ er ddlm	Z	 ddl
mZmZmZmZmZ dd	lmZ e r;d d
lZeeZG dd deZd
S )    )TYPE_CHECKING   )tqdm   )HfQuantizer)get_module_from_name)PreTrainedModel)is_accelerate_availableis_flute_availableis_hadamard_availableis_torch_availablelogging)QuantizationConfigMixinNc                       s   e Zd ZdZdZdef fddZdd ZdddZ		dddZ	dddZ
ed
efddZdd Zddded
efddZdd Z  ZS )HiggsHfQuantizerz
    Quantizer of the HIGGS method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
    Fquantization_configc                    s   t  j|fi | d S )N)super__init__)selfr   kwargs	__class__ i/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/quantizers/quantizer_higgs.pyr   )   s   zHiggsHfQuantizer.__init__c                 K   s~   t j s	tdt stdt stdt std|d u r&tdt	|t
r;d| v s7d| v r=tdd S d S )	NzNHIGGS quantization is only supported on GPU. Please use a different quantizer.zHUsing `higgs` quantization requires Accelerate: `pip install accelerate`zLUsing `higgs` quantization requires FLUTE: `pip install flute-kernel>=0.3.0`zbUsing `higgs` quantization requires fast_hadamard_transform: `pip install fast_hadamard_transform`zwYou are attempting to load a HIGGS model without setting device_map. Please set device_map comprised of 'cuda' devices.cpudiskzYou are attempting to load a HIGGS model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availableNotImplementedErrorr	   ImportErrorr
   r   
ValueError
isinstancedictvalues)r   
device_mapr   r   r   r   validate_environment,   s*   

z%HiggsHfQuantizer.validate_environmentdtypetorch.dtypereturnc                 C   s(   |t jkr|t jkrtd| d|S )NzInvalid `dtype` zS. HIGGS quantization only supports `dtype=torch.float16` or `dtype=torch.bfloat16`.)r   float16bfloat16r    )r   r&   r   r   r   update_dtypeG   s
   
zHiggsHfQuantizer.update_dtypemodelr   c                 K   s8   ddl m} | || jj|j| _||| j| jd d S )Nr   )replace_with_higgs_linear)r   modules_to_not_convert)integrationsr-   get_modules_to_not_convertr   r.   _keep_in_fp32_modules)r   r,   r   r-   r   r   r   $_process_model_before_weight_loadingq   s   
z5HiggsHfQuantizer._process_model_before_weight_loadingc           
         s   ddl m}m} ddlm} ddlm  i } fdd| D }t|	 dd	d
D ]@\}}	|	j
j|vr?||	j
jd||	j
j< ||	j
j |	_|| jj| |	_||	j
j|	jj|	jd\|	j
_|	_|	j | jj|< q*d S )Nr   )TuneMetaDatamaybe_tune_and_repack)make_workspace_streamkr   HiggsLinearc                    s    i | ]\}}t | r||qS r   )r!   ).0namemoduler6   r   r   
<dictcomp>   s     zHHiggsHfQuantizer._process_model_after_weight_loading.<locals>.<dictcomp>zRepacking HIGGS modulesF)descleave)device)weightscalesmetadata)
flute.tuner3   r4   flute.utilsr5   r/   r7   named_modulesr   itemsr?   r>   	workspace	from_dictr   tune_metadatadatar@   to_dict)
r   r,   r   r3   r4   r5   flute_workspacesflute_modulesr9   r:   r   r6   r   #_process_model_after_weight_loading   s"   z4HiggsHfQuantizer._process_model_after_weight_loadingc                 C      dS )NFr   r   r   r   r   is_trainable   s   zHiggsHfQuantizer.is_trainablec                 C   rN   )NTr   rO   r   r   r   is_serializable   s   z HiggsHfQuantizer.is_serializable
param_namec                 K   s4   ddl m} t||\}}t||r|dkrdS dS )Nr   r6   r?   TF)r/   r7   r   r!   )r   r,   rR   r   r7   r:   tensor_namer   r   r   param_needs_quantization   s
   z)HiggsHfQuantizer.param_needs_quantizationc                 C   s   ddl m} ||}|S )Nr   )dequantize_higgs)r/   rU   )r   r,   rU   r   r   r   _dequantize   s   zHiggsHfQuantizer._dequantize)r&   r'   r(   r'   )r,   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r%   r+   r2   rM   propertyboolrP   rQ   strrT   rV   __classcell__r   r   r   r   r   "   s    
*


r   )typingr   utils.loggingr   baser   quantizers_utilsr   modeling_utilsr   utilsr	   r
   r   r   r   utils.quantization_configr   r   
get_loggerrW   loggerr   r   r   r   r   <module>   s   
