o
    iJ                     @   sh   d dl mZ ddlmZ erddlmZ ddlmZmZm	Z	 e r%d dl
Z
e	eZG dd	 d	eZdS )
    )TYPE_CHECKING   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                       s   e Zd ZdZdZ fddZdd Z		dd	d
Zdee	e
e	B f dee	e
e	B f fddZdd ZedefddZedefddZdd Z  ZS )BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
    Tc                    s   t  j|fi | d S )N)super__init__)selfquantization_configkwargs	__class__ j/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/quantizers/quantizer_bitnet.pyr   *   s   zBitNetHfQuantizer.__init__c                 O   s   t  stdtj std d S |d}|d u r#td d S t|t	r>t
|dkr4d| v s:d| v r@tdd S d S )	NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.r   cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrortorchcudais_availableloggerwarning_onceget
isinstancedictlenvalues
ValueError)r   argsr   r   r   r   r   validate_environment-   s&   


$z&BitNetHfQuantizer.validate_environmentmodelr   c                 K   s8   ddl m} | || jj|j| _||| j| jd}d S )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   )integrationsr&   get_modules_to_not_convertr   r'   _keep_in_fp32_modules)r   r%   r   r&   r   r   r   $_process_model_before_weight_loadingD   s   
z6BitNetHfQuantizer._process_model_before_weight_loading
max_memoryreturnc                 C   s   dd |  D }|S )Nc                 S   s   i | ]	\}}||d  qS )g?r   ).0keyvalr   r   r   
<dictcomp>V   s    z7BitNetHfQuantizer.adjust_max_memory.<locals>.<dictcomp>)items)r   r,   r   r   r   adjust_max_memoryU   s   z#BitNetHfQuantizer.adjust_max_memoryc                 C   s   dS )NTr   r   r   r   r   is_serializableY   s   z!BitNetHfQuantizer.is_serializablec                 C      | j jdko| j jdkS )Nautobitlinearonliner   linear_classquantization_moder4   r   r   r   is_trainable\   s   
zBitNetHfQuantizer.is_trainablec                 C   r6   )zUFlag indicating whether the quantized model can carry out quantization aware trainingr7   r8   r9   r4   r   r   r   is_qat_trainablec   s   
z"BitNetHfQuantizer.is_qat_trainablec                 C   sN   ddl m} ddlm} | jjdkr%| jjdkr%|dgdg|| gdgS g S )Nr   )WeightConverter)BitNetDeserializer7   offlineweight)source_patternstarget_patterns
operations)core_model_loadingr>   integrations.bitnetr?   r   r:   r;   )r   r>   r?   r   r   r   get_weight_conversionsk   s   z(BitNetHfQuantizer.get_weight_conversions)r%   r   )__name__
__module____qualname____doc__requires_calibrationr   r$   r+   r   strintr3   r5   propertyboolr<   r=   rG   __classcell__r   r   r   r   r
       s    
*r
   )typingr   baser   modeling_utilsr   utilsr   r   r	   r   
get_loggerrH   r   r
   r   r   r   r   <module>   s   
