o
    iAA                     @   sZ  U d dl Zd dlZd dlZd dlmZ d dlmZ d dlm	Z
 ddlmZmZ ddlmZ dd	lmZ eeZzQd d
lmZmZmZmZmZ d dlmZ d dlmZ zd dlmZ W n e yg   dZY nw z
d dlm!Z" dZ#W n e y}   dZ#Y nw ej$%dd& Z'dZ(e'ev Z)de*fddZde*fddZ!dedddidedddiej+edd diej+edd diej+ed!d"diej+ed#d"diej+edd did$ded%d&diej,ed'd(dej+ed'd(diej+ed)d(diej+ed'd(did*dej+ej-B ed+d,dd-iidej+ej-B ed+d.dd-iidej+ej-B ed+d/dd-iidej+ej-B ed+d0dd-iidej+ej-B ed+d1dd-iidej+ej-B ed+d2dd-iid3Z.e/e*e/ee*B ee/eef B f f e0d4< edurej+ed5d6d7iej+ed5d6d7id8e.d9< d:d; Z1d`d<d=Z2W n) e y   dZ(dZ)d>d Zd?d Z!G d@dA dAZdBdC ZdDdE ZdFd= Z2Y nw dGddHdIddHdIddHdJZ3e/e*e/e*e*f f e0dK< i Z4e/e*edB f e0dL< dMe*dB dNe5fdOdPZ6	d`dMe*dQedB dNedB fdRdSZ7e4fdTe*dUe/e*edB f fdVdWZ8dadTe*dXe*dB dYe9e*B dB dNefdZd[Zd\e:e eB fd]d^Z;g d_Z<dS )b    N)Callable)
ModuleType)version   )ENV_VARS_TRUE_VALUESlogging)is_kernels_available   )flash_attention_forward)DeviceLayerRepositoryModeregister_kernel_mappingreplace_kernel_forward_from_hub)
get_kernel)use_kernel_forward_from_hub)FuncRepository)use_kernel_func_from_hubTFUSE_HUB_KERNELSYES
layer_namec                 C   s$   t rt| S tdt  dd S )NFkernels hub usage is disabled through the environment USE_HUB_KERNELS=c                 S      | S N clsr   r   g/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/integrations/hub_kernels.py<lambda>E       z-use_kernel_forward_from_hub.<locals>.<lambda>)_kernels_enabled$_kernels_use_kernel_forward_from_hubloggerwarning_once_TRANSFORMERS_USE_HUB_KERNELS)r   r   r   r   r   >   s   r   	func_namec                 C   s>   t rtrt| S tstd dd S tdt  dd S )Nzwuse_kernel_func_from_hub is not available in the installed kernels version. Please upgrade kernels to use this feature.r   c                 S   r   r   r   funcr   r   r   r   T   r   z*use_kernel_func_from_hub.<locals>.<lambda>)r    _has_use_kernel_func_from_hub!_kernels_use_kernel_func_from_hubr"   r#   r$   )r%   r   r   r   r   G   s   r   cudaz!kernels-community/deformable-detrMultiScaleDeformableAttention)repo_idr   zkernels-community/moeLlama4TextMoezkernels-community/liger_kernelsLigerRMSNormzkernels-community/rmsnormRMSNormzkernels-community/mlx_rmsnorm)r*   rocmxpumpsnpuzmedmekk/triton-llama-mlpTritonLlamaMLPzkernels-community/megablocksMegaBlocksMoeMLPzahadnagy/megablocks)r*   r0   r1   zkernels-community/activationFastGELU)r,   r   r   	QuickGELUNewGELUSiluGeluGeluTanh)r+   r-   r/   MLPr5   r6   r7   r8   SiLUGeLUr;   _KERNEL_MAPPINGzkernels-community/rotaryapply_rotary_transformers)r,   r%   )r1   r*   rotary_pos_embc                    s"    | v pt  fdd|  D S )Nc                 3   s$    | ]}t |tot| V  qd S r   )
isinstancedicthas_key).0vkeyr   r   	<genexpr>   s   " zhas_key.<locals>.<genexpr>)anyvalues)drH   r   rG   r   rD      s   "rD   c                 C   s4   | d u rt } t| drtddstdt|  d S )Nr1   z0.10.2)MIN_VERSIONzfkernels uses an incompatible version. Please install the latest version with `pip install -U kernels`.)r?   rD   r   ImportErrorr   )mappingr   r   r   $register_kernel_mapping_transformers   s   rP   c                  O      dd }|S )Nc                 S   r   r   r   r   r   r   r   	decorator      z.use_kernel_forward_from_hub.<locals>.decoratorr   argskwargsrR   r   r   r   r         c                  O   rQ   )Nc                 S   r   r   r   r&   r   r   r   rR      rS   z+use_kernel_func_from_hub.<locals>.decoratorr   rT   r   r   r   r      rW   c                   @   s   e Zd Zdd ZdS )r   c                 O      t d)NzNLayerRepository requires `kernels` to be installed. Run `pip install kernels`.RuntimeError)selfrU   rV   r   r   r   __init__     zLayerRepository.__init__N)__name__
__module____qualname__r\   r   r   r   r   r     s    r   c                  O   rX   )Nz^replace_kernel_forward_from_hub requires `kernels` to be installed. Run `pip install kernels`.rY   rU   rV   r   r   r   r        r   c                  O   rX   )NzVregister_kernel_mapping requires `kernels` to be installed. Run `pip install kernels`.rY   ra   r   r   r   r     r]   r   c                  O   rX   )Nzcregister_kernel_mapping_transformers requires `kernels` to be installed. Run `pip install kernels`.rY   ra   r   r   r   rP     rb   zkernels-community/causal-conv1d)r,   r   zkernels-community/mamba-ssm)zcausal-conv1dz	mamba-ssmzfalcon_mamba-ssm_HUB_KERNEL_MAPPING_KERNEL_MODULE_MAPPINGattn_implementationreturnc                 C   s   | duot d| duS )zJCheck whether `attn_implementation` matches a kernel pattern from the hub.Nz'^[^/:]+/[^/:]+(?:@[^/:]+)?(?::[^/:]+)?$)research)re   r   r   r   	is_kernel  s   ri   attention_wrapperc              
   C   s0  ddl m} ddlm} d| v r| dd n| }t|sdS ts%tdd|v r5|d\}}| }n|}d}| }|	d	\}}}| }|rO| nd}zt
||d
}	W n typ }
 ztd| d|
 dd}
~
ww t|	dr|du r|t}|}n	|durt|	|}|| | || |d  |	S )a<  
    Load and register the kernel associated to `attn_implementation`.

    Args:
        attn_implementation: A string, usually a kernel repo like "kernels-community/flash-mla".
        attn_wrapper: a callable for the wrapper around the attention implementation. In `transformers` we
            have a wrapper around the `flash_attn_var_len` call, and the same goes for `sdpa` and `eager`.
            They just prepare the arguments properly. This is mostly used for continious batching, where we
            want the `paged` wrapper, which calls the paged cache.
    r   )ALL_MASK_ATTENTION_FUNCTIONS)ALL_ATTENTION_FUNCTIONS|r	   Nz`kernels` is either not installed or uses an incompatible version. Please install the latest version with `pip install -U kernels`.:@)revisionz-An error occurred while trying to load from 'z': .flash_attn_varlen_funcflash_attention_2)masking_utilsrk   modeling_utilsrl   splitri   _kernels_availablerN   strip	partitionr   	Exception
ValueErrorhasattrr
   getattrregister)re   rj   rk   rl   actual_attn_namer,   kernel_name_revkernelekernel_functionr   r   r   load_and_register_attn_kernel%  sB   


r   r   rO   c                 C   s  | |v rt ||  tr||  S | tvr"td|  d d || < d S trjz&t|  d }t|  dd }t|  dd }t|||d}||| < W ||  S  tyZ   d || < Y ||  S  t	yi   d || < Y ||  S w dd l
}| dd	}d
| d}z|dt}	t|	|d }
W n ty   d }
Y nw t|
r|
 rz|| }||| < |W S  ty   d || < Y ||  S w d || < ||  S )NzKernel z! not found in _HUB_KERNEL_MAPPINGr,   rp   r   rp   r   r   -r   is_
_availablez..utils.import_utils)rB   r   rc   r"   r#   rw   getr   FileNotFoundErrorAssertionError	importlibreplaceimport_module__package__r}   rz   callable)r   rO   r,   rp   r   r   r   new_kernel_namer%   	utils_modis_kernel_availablemoduler   r   r   lazy_load_kernel_  sR   



r   rp   r   c                 C   sb   ddl m} d|| d}tr-tjd}t|tdkr&t| |||dS t| ||dS t	d	)
Nr   )__version__transformers)	frameworkr   r,   kernelsz0.10.4)rp   r   
user_agentr   zFkernels is not installed, please install it with `pip install kernels`)
 r   rw   r   metadatar   pkg_versionparseget_kernel_hubrN   )r   rp   r   r   r   kernels_versionr   r   r   r     s   r   module_namesc                    s    t  tr g  fdd}|S )a  
    This decorator attaches the target function as an attribute of the module.
    The function must already be decorated with @use_kernel_func_from_hub
    this decorator then wraps it as an nn.Module internally.
    When kernelize is later applied to the full model, the function can be accessed as a regular module attribute and kernelized just like any other layer.
    The kernelization is performed in place, modifying the module directly.
    c                    s   | j   fdd}|| _ | S )Nc                    s0   | g|R i |  D ]}t | d| qd S )N	rotary_fn)setattr)r[   rU   rV   fn)r   	orig_initr   r   new_init  s   z8use_kernelized_func.<locals>.decorator.<locals>.new_init)r\   )r   r   r   )r   r   rR     s   z&use_kernelized_func.<locals>.decorator)rB   r   )r   rR   r   r   r   use_kernelized_func  s   
r   )	r   r   r   r   rP   r   r   r   r   r   )NN)=importlib.metadatar   osrg   collections.abcr   typesr   	packagingr   r   utilsr   r   utils.import_utilsr   flash_attentionr
   
get_loggerr^   r"   r   r   r   r   r   r   r   r   r   r!   r   rN   r   r)   r(   environr   upperr$   rw   r    str	INFERENCETRAININGTORCH_COMPILEr?   rC   __annotations__rD   rP   rc   rd   boolri   r   r   intlistr   __all__r   r   r   r   <module>   s\  

	"


.


%	
":(/