o
    i                     @  s   d dl mZ d dlmZ d dlmZmZ ddlmZ ddl	m
Z
mZ eeZe r4d dlZd dlmZ 						ddddZG dd deZG dd deZdS )    )annotations)Any)is_torch_availablelogging   )ConversionOps)get_module_from_nameshould_convert_moduleNcuda:0Fmodeltorch.nn.Modulemodules_to_not_convertlist[str] | Nonequant_configdict | Nonecompute_dtypetorch.dtypedevicestrpre_quantizedboolreturnc              
   C  s   ddl m} |du rg }t|  D ]D\}}t|tjsqt||s#q|d\}	}
}|	r2| 	|	n| }||s:|j
nd|s@|jnd|sH|jdund|||dd}t||| q| S )a  
    Replace nn.Linear modules with empty SINQLinear modules.

    Args:
        model: The model to modify
        modules_to_not_convert: List of module names to skip
        quant_config: SINQ quantization config dict (None for pre-quantized models)
        compute_dtype: Computation dtype for the quantized layers
        device: Device string for the quantized layers
        pre_quantized: Whether loading a pre-quantized checkpoint

    Returns:
        The modified model with SINQLinear modules
    r   )
SINQLinearN.FT)in_featuresout_featuresbiasr   r   r   use_unpack_kernel)sinq.sinqlinear_hfr   listnamed_modules
isinstancennLinearr	   
rpartitionget_submoduler   r   r   setattr)r   r   r   r   r   r   r   	full_namemoduleparent_path_
child_nameparent
sinq_layer r.   `/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/integrations/sinq.pyreplace_with_sinq_linear    s*   

r0   c                   @  s*   e Zd ZdZdd Z			ddddZdS )SinqQuantizea'  
    Param-level ConversionOp for SINQ (from FP weights).

    At load time, for each `Linear.weight` that should be quantized:
      - The SINQLinear module already exists (created in _process_model_before_weight_loading)
      - We just call quantize() on it with the loaded weight tensor
    c                 C  
   || _ d S Nhf_quantizerselfr5   r.   r.   r/   __init__\      
zSinqQuantize.__init__N
input_dictdict[str, Any]r   torch.nn.Module | Nonefull_layer_name
str | Noner   dict[str, torch.Tensor]c                 K  s^   t t| \}}t|tr|d n|}t||\}	}
|	| |d ur*|| d|	_i S )Nr   T)	nextiteritemsr!   r   r   quantizediscard_is_hf_initialized)r7   r:   r   r=   missing_keyskwargsr*   valuesweight_tensorr(   tensor_namer.   r.   r/   convert_   s   

zSinqQuantize.convert)NNNr:   r;   r   r<   r=   r>   r   r?   __name__
__module____qualname____doc__r8   rK   r.   r.   r.   r/   r1   S   s    r1   c                   @  s(   e Zd ZdZdd Z		ddddZdS )SinqDeserializea0  
    ConversionOp for loading *pre-quantized* SINQ checkpoints.

    Checkpoint layout (what `SINQLinear.state_dict` produces) is, per module:
        <prefix>.W_q
        <prefix>.bias
        <prefix>.meta

    WeightConverter in the quantizer is configured so that:
      - we group ".W_q", ".meta", ".bias" as input_dict
      - conceptually treat them as belonging to "<prefix>.weight"
      - and call this SinqDeserialize.convert to load the state into the existing SINQLinear.

    The returned dict is {} because we load directly into the module.
    c                 C  r2   r3   r4   r6   r.   r.   r/   r8      r9   zSinqDeserialize.__init__Nr:   r;   r   r<   r=   r>   r   r?   c                 K  s   t | D ]\}}t|t r|d ||< q|d}|d}|d}	|d u s-|d u rBtt| }t|t r>|d }||iS t||\}
}||d}|	d urV|	|d< |
| d|
_	i S )Nr   z.W_qz.metaz.bias)W_qmetar   T)
r   rB   r!   getr@   rA   rH   r   load_state_dictrE   )r7   r:   r   r=   rG   kvrS   rT   r   r(   r*   stater.   r.   r/   rK      s*   





zSinqDeserialize.convert)NNrL   rM   r.   r.   r.   r/   rR   v   s    rR   )NNNr
   F)r   r   r   r   r   r   r   r   r   r   r   r   r   r   )
__future__r   typingr   transformers.utilsr   r   core_model_loadingr   quantizers.quantizers_utilsr   r	   
get_loggerrN   loggertorchtorch.nnr"   r0   r1   rR   r.   r.   r.   r/   <module>   s"   
3#