o
    iw                  	   @   s  d Z ddlmZ ddlmZ ddlmZmZmZm	Z	 e r'ddl
Z
ddlmZ e r8ddlmZ ddlmZmZ e rAdd	lmZ e	eZd!d
dZdededdfddZd"dedededefddZG dd de
jjZd#dee dB fddZd$dd Z dS )%zZHIGGS through FLUTE (Flexible Lookup Table Engine for LUT-quantized LLMs) integration file    )sqrt   )should_convert_module)is_flute_availableis_hadamard_availableis_torch_availableloggingN)prepare_data_transposed)TuneMetaDataqgemm_v2)hadamard_transformc           	      C   sn   dd t dt| j D }|D ]}| j| }|d | d | }|| }||d| d < qtj| |d|S )Nc                 S   s   g | ]}d qS r    ).0_r   r   a/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/integrations/higgs.py
<listcomp>%   s    z pad_to_block.<locals>.<listcomp>r      constant)rangelenshapenn
functionalpad)	tensordimshad_block_sizevaluepad_dimsdimsizenext_multiple_of_1024deltar   r   r   pad_to_block$   s   
r%   pnreturnztorch.Tensorc                 C   s  | |fdkrt g ddgddgddgdd	gd
dgddgddgddgddgddgddgddgddgddgddgd d!gd"d#gd$d%gd&d'gd(d)gd*d+gd,d-gd.d/gd0d1gd2d3gd4d5gd6d7gd8d9gd:d;gd<d=gd>d?gd@dAgdBdCgdDdEgdFdGgdHdIgdJdKgdLdMgdNdOgdPdQgdRdSgdTdUgdVdWgdXdYgdZd[gd\d]gd^d_gd`dagdbdcgdddegdfdggdhdigdjdkgdldmgdndogdpdqgdrdsgdtdugdvdwgdxdygdzd{gd|d}gd~dgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgd dgddgddgddgdd	gd
dgddgddgddgddgddgddgddgddgddgddgd d!gd"d#gd$d%gd&d'gd(d)gd*d+gd,d-gd.d/gd0d1gd2d3gd4d5gd6d7gd8d9gd:d;gd<d=gd>d?gd@dAgdBdCgdDdEgdFdGgdHdIgdJdKgdLdMgdNdOgdPdQgdRdSgdTdUgdVdWgdXdYgdZd[gd\d]gd^d_gd`dagdbdcgdddegdfdggdhdigdjdkgdldmgdndogdpdqgdrdsgdtdugdvdwgdxdygdzd{gd|d}gd~dgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgdĐdgdƐdgdȐdgdʐdgd̐dgdΐdgdАdgdҐdgdԐdgd֐dgdؐdgdڐdgdܐdgdސdgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgd dgS | |fdkrt g ddgddgddgd	d
gddgddgddgddgddgddgddgddgddgddgdd gd!d"gd#d$gd%d&gd'd(gd)d*gd+d,gd-d.gd/d0gd1d2gd3d4gd5d6gd7d8gd9d:gd;d<gd=d>gd?d@gdAdBgdCdDgdEdFgdGdHgdIdJgdKdLgdMdNgdOdPgdQdRgdSdTgdUdVgdWdXgdYdZgd[d\gd]d^gd_d`gdadbgdcddgdedfgdgdhgdidjgdkdlgdmdngdodpgdqdrgdsdtgdudvgdwdxgdydzgd{d|gd}d~gddgddgS | |fdkrt ddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddggS | |fdkr5t dgdgdgdgdgdgdgdgdgdgdgdgdgdgdgdggS | |fdkrZt dgdgdgdgdgdgdgdggS | |fdkrst dgdgdgdggS td|  d| (  N)r      g   g    f?g    Bg   ]?g   _g   \?g   9g   @g   ?g   g   @y?g   Ђ
@g   @<?g   4@g   @V
@g   i@g   ?g   [g   U g    v?g   `~?g   ?g   F?g    g   #	@g   ??g    u?g    g   ?g   !	g   v-?g    οg    W?g   Dg   @?g   -?g   `g   `?g   g    Կg   iǿg   5?g   `Xg    #8g    g    Eg   1߿g   `c?g   g   g    g    ??g    *V?g   ?g   @?g    ?g   ^
g   @xg    n|g   @g   `pg    5g   `Ag   ?g   @UVĿg   yg   Sg    d?g    g   g   `KG?g   `?g   ?g   	?g   K@g   @?g   Y?g   sg   `|οg   @?g     o@g    Dg   @?g    2?g   ?g    g   R @g   `(?g    Er?g   @N0g   `'/?g   g   >?g   ?g   `Qcg   2g   :7g   dig    c?g   `ֿg   `ؿg   ?g    +@g   ?g   `(f?g   @8ig   =g   򩣿g   
g   +g   fg    g   Lg   x?g   `*(g    ?g    /տg   @/aſg   Eg   gg    ?g   Gg   {?g   h?g   _a?g   ?g   ?g   3@g   #?g   yg   ѱܿg    
@g   ׿g    d,g   g   &g   ٿg   `{X?g   Yb?g   @"g    يg   3g   T g   @@g   kg   g   `{ڿg    ?g   >g   	?g   g   `5g   ?g    ?g   5?g   ?g   Iؿg    A?g   ?g   ѿg   9?g   @@g   zg    j	g    g   @g   @N?g    }g   ?g    Zg   `@g   cg    ҿg   @=Dg   g    ?g   `g   b@g   {@g    h @g    g   ?g   ]ƿg   K?g   g   w?g   `?g    4?g    b?g    g   `ſg   p?g   @?g   @Q=s?g   @տg   g   @@g   1u?g   ͔ƿg   @?g   䉫?g   `@g   _g    @g    ,?g   `dg   `T?g   L?g   b?g   3?g   ȃg   xg    v?g   `?g   섥g   K3g   /ۿg   `Eg   @#?g   3?g   
g    c?g    1g    ?g   `%?g    %?g   @w@g   I?g   kg    g   @J߿g   ?g    ǿg   Fֿg   9?g   ` g   8?g   `@g    xg   @wg    x?g    g   @ҿg   ͆g    vg    ?g   n@g   `9ǿg   Y?g    g   @?g    [m?g    F?g   @?g   `Ьg   `Gg    c?g   @g   Pug   @?g   g   @Hg    4g   @ g   wg   #?g    d?g   Mg    g    ?g   ?g   @vPg   ſg   r?g   Lg    ?g   @?g    :ڿg    ?g   ?g   `
?g   @_ g   $Rg    Z?g   3g   `g    ?g   @Og   ew@g    Lg   V?g   ߩҿg    V?g   ?g   g   ?g   `;?g    Կg   `@g   -g    ?g   `g   ZY?g    g    	?g    8;?g    ?g   aοg   @Rh?g   `F
?g   A?g   @?g    g   `g    8?g   `>?g   ?g   @pg   i?g    ?g   `пg   @ @g   B?g    bg   @ @g    g   `\g   g   )W?g   `?g   `g   `@g    0$ӿg    >g    i?g   cg   ?g   H?g    T?g    ]g   .ܿg   yؿg   *g   `F?g   @g   `Is?g   _;?g   E?g    g   `<?g   `B?g   @ެ?g   `5g   ׿g   `Sg   +Dg   @4?g    T?g   W@g   ܨݿg   1?g   	g    xg   `?g    ?g   h?g   `g   Կg   g    ȿg   -g   `?g    ֭g   ?g   `?g   ?g   `$Zg   G)?g   ?g    3g   %g   `axg   ?g   @Ŀg    ?g   g   g   ڜ?g   @&C?g   `"?g   @Cg   nͿg   92ֿg   @g   g   `g   g   =?g   `L?g   g    X?g    Sg   @g   `T @g   @?g    ҿg    g   `g    X
ڿg    <?g   `gg   ؿg    yg   z?g   ĝ?g   + ?g   $u?g   mg   Pvg   ?g   lg   	ؿg   ?g   y@g   @xɿg    m?g   (g   1ٿg   @qg    g   ?g   2Ig   t?g     i?g    	g   @g   `s?g   `
@g    M.g    ?g    ?g   `4g   g   @j	g   @Cg   oEg   ?g    3@g   k?g   ?g    ?g   @߿g   ޟ?g   οg   og   7g   +g    ſg   pj?g   g    g   @g   (g    =g   0?g   `>޿g    .g   @\?g   `[?g    #?g    ?g   ?g   g   N?g   jg   =?g   R޿g   耎?g   `g   Lg    @g   =g   @g   @?g   R@g   rg   g   ?g   `v_?g   @T?g   ?g   @?g   8?g    {}g   i?g   Sg   @k?g   s?g    ?g   ?g    _g   @3@g   ?g   @ ?g   `Xrg   [g   g   ?g   `?g   @?g    5g   ?)r   @   g   g   x?g    g   k?g    g   l?g   y?g   6g   ?g   пg   `5?g   k@g   $g    z?g    1J@g   ?g   o?g   .g   hg   ӿg   _?g   ?g   $?g   g   6@g   @T?g   -?g   g    @g   g   ù?g   .?g   ?g   fg   ?g   ,'?g    ?g   ^@g   @Lg    >ۿg   4bg   ?g    Ng   ޿g    ?g   nZg   tg   i~@g    Jf?g   ^Gg   `mg   `6@g   /?g    ?g    Oܿg   ?g   g   gg    ^Dg   Fg   |g   1g   `8g   @*?g   ֿg   <Sg    ^g   @^?g    Wig    ?g    1?g   6?g   @J?g   @(p?g   @??g   Ơ?g    D:?g   g   `uڿg   nͿg    @g    [g    j?g   =?g     ?g    Z¿g   ?g   ?g   G?g   ܿg   `9?g   g   ?g   ?g   :eg    zg   kg    g   m?g    [rٿg   ߿g   ?g    y?g    ?g   ?g   `/g   (&g    C?g   @ݿg   g   `ٿg   @8lg   hg   9?g   ߫g   ?g   z|g    y吿g   `ݨg   @g   ` ?g   g    ;Sg   `?g   "տg   `1?g   "1?g   W?)r      g   
g   @:-g   g    l?g   g   `?g   ;?g   @wg   g   @Lug    C?g   F?g   @?g   @d?g   `F?g    :Ƣ?g   ?g   g   `(g   Eg   g   ¦?g   `?g   Oӿg   @?g   ?g   LSg   &,?g   ?g   Sg   @{Ŀg   ?)r   r+   g   Wg   X g   g   g    'g   +g   ؿg   ?og   ?o?g   ?g   +?g    '?g   ?g   ?g   X @g   W@)r      g   @/7g    g    21g    ?_Ͽg    ?_?g    21?g    ?g   @/7@)r      g   *g    Yܿg    Y?g   *?zUnsupported p=z, n=)torchr   NotImplementedError)r&   r'   r   r   r   get_higgs_grid/   s  	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ 
  
  
  
  
  
  
  
  
	  

  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
   
!  
"  
#  
$  
%  
&  
'  
(  
)  
*  
+  
,  
-  
.  
/  
0  
1  
2  
3  
4  
5  
6  
7  
8  
9  
:  
;  
<  
=  
>  
?  
@  
A  
B  
C  
D  
E  
F  
G  
H  
I  
J  
K  
L  
M  
N  
O  
P  
Q  
R  
S  
T  
U  
V  
W  
X  
Y  
Z  
[  
\  
]  
^  
_  
`  
a  
b  
c  
d  
e  
f  
g  
h  
i  
j  
k  
l  
m  
n  
o  
p  
q  
r  
s  
t  
u  
v  
w  
x  
y  
z  
{  
|  
}  
~  
   
    
    








	























 
!
"
#
$
%
&
'
(
)
*
+
,
-
.
/
0
1
2
3
4
5
6
7
8
9
:
;
<
=
>
?
@D















"r0   r-   r)      bits
group_sizehadamard_sizec                 C   s  t | jdksJ dt|d||  | j}tjj|ddd }| j}| j}| jdtj	d} t
| dg|} | jd | }	| d|	|} tjj| dd}
t| d|
d d d d d f  } t
| dg|| jd |	d|} tj| jd d |tjd	}td| jd d
D ] }tjd| ||d
   |j | ddtj|||d
 < q~ ||jd d}|
t| }
t|tj|
||| dd|||||||dd	\} }
}}}| |
||jtjd|dS )Nr   z%Only 2D weights are supported for now)axisT)copydtyper   r   devicer8   r+   )r!   F)num_bitsr3   vector_sizer8   r:   check_correctnessr8   )weightscalestablestables2tune_metadata)r   r   r0   tor:   r.   linalgnormr8   float32r%   reshaper   emptyuint8r   argmaxTr   r	   repeat_interleaveviewfloat16)r?   r2   r&   r3   r4   gridgrid_norm_2r:   r8   multr@   codesirA   rB   rC   r   r   r   quantize_with_higgs  sF     >rU   c                       sZ   e Zd Z					ddedededejdB d	ejdB d
edef fddZdd Z  Z	S )HiggsLinearTNr)   r1   in_featuresout_featuresr;   r8   r:   r3   r4   c	           	         s  t    || _|| _|| _|| _|| _|| dksJ |dv s"J tjt	j
|| d |ft	j|ddd| _tjt	j
||| f||ddd| _tjt	j
d| f||ddd| _tjt	j
d| d| df||ddd| _|rtjt	j
|||ddd| _n| d	d  d | _d | _d S )
Nr   )r      r-   r+   )r8   r:   F)requires_gradr   r9   bias)super__init__rW   rX   r;   r3   r4   r   	Parameterr.   rI   int16r?   r@   rA   rB   r[   register_parameter	workspacerC   )	selfrW   rX   r;   r[   r8   r:   r3   r4   	__class__r   r   r]     s0   
"
zHiggsLinear.__init__c              
   C   sR   t |dg| j}| jd u rtdt|| j| j| j| jj	t
jd| j| j| jdS )Nr5   z,Workspace must be set before calling forwardr>   )r4   )r%   r4   ra   	Exceptionr   r?   r@   rA   rB   rN   r.   rG   rC   )rb   xr   r   r   forward  s   
zHiggsLinear.forward)TNNr)   r1   )
__name__
__module____qualname__intr.   r8   r:   r]   rg   __classcell__r   r   rc   r   rV     s,    	)rV   modules_to_not_convertc              
   C   s   d}|   D ]I\}}t||sqtd2 t|tjr@t|j|j	|j
du|j|j|jd}t||_|d | || d}W d   n1 sJw   Y  q|sWtd | S )a_  
    Public method that replaces the Linear layers of the given model with HIGGS quantized layers.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
            A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
            converted.
        quantization_config (`HiggsConfig`):
            The quantization config object that contains the quantization parameters.
    FmetaN)r[   r;   r4   r3   TzYou are loading your model using eetq but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)named_modulesr   r.   r:   
isinstancer   LinearrV   rW   rX   r[   r2   r4   r3   type
source_clsrequires_grad_set_submoduleloggerwarning)modelrm   quantization_confighas_been_replacedmodule_namemodule
new_moduler   r   r   replace_with_higgs_linear&  s4   


r~   c              	   C   s   t  i |  D ]Z\}}|du rg }|| t|trN|j}|j}t jj	|||j
du|jj|jjd| j|< |t j||jj|jjdj | j| j_tt| dkr^t||d}|d q	| W  d   S 1 spw   Y  dS )a  
    Dequantizes the HiggsLinear layers in the given model by replacing them with standard torch.nn.Linear layers.
    Args:
        model (torch.nn.Module): The model containing HiggsLinear layers to be dequantized.
        current_key_name (list, optional): A list to keep track of the current module names during recursion. Defaults to None.
    Returns:
        torch.nn.Module: The model with HiggsLinear layers replaced by torch.nn.Linear layers.
    N)r[   r:   r8   r9   r   )current_key_namer5   )r.   no_gradnamed_childrenappendrp   rV   rW   rX   r   rq   r[   r@   r:   r8   _moduleseyerL   
contiguousr?   datar   listchildrendequantize_higgspop)rx   r   namer|   rW   rX   r   r   r   r   r   Q  s8   



$r   r   )r-   r   r)   r1   )NN)N)!__doc__mathr   quantizers.quantizers_utilsr   utilsr   r   r   r   r.   torch.nnr   flute.integrations.higgsr	   
flute.tuner
   r   fast_hadamard_transformr   
get_loggerrh   rv   r%   rk   r0   rU   ModulerV   r   strr~   r   r   r   r   r   <module>   s,   

   3<+