o
    i                     @   s   d dl Z ddlmZmZ ddlmZ eeZe Z	de j
de jjde jfdd	Z	
				dde jjde j
de j
de j
de j
dB dededB dedB dedB dedB dee j
df fddZdS )    N   )_flash_attention_forward!flash_attn_supports_top_left_mask)loggingquerymodulereturnc                 C   sR   | j tjkr'tdrtdS t|jdr|jj S tdd | D j	j S dS )ziIf the query is in float32, return a target dtype compatible with flash attention. Return None otherwise.cuda_is_quantizedc                 s   s"    | ]}t |tjjr|V  qd S )N)
isinstancetorchnnLinear).0layer r   k/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/integrations/flash_attention.py	<genexpr>   s     z#get_target_dtype.<locals>.<genexpr>N)
dtyper   float32is_autocast_enabledget_autocast_dtypehasattrconfignextmodulesweight)r   r   r   r   r   get_target_dtype   s   

r           keyvalueattention_maskdropoutscalingsliding_windowsoftcap	is_causalc
                 K   s   |
 ddrtd |jd }tdd |jD rtd|dd}|dd}|dd}t|| }|	d ur;|	n| j}	t	||||f||	||||t
|| jjt| d	rW| jnd d

|
}|d fS )Noutput_attentionsFzFlash Attention does not support `output_attentions=True`. Please set your attention to `eager` if you want any of these features.r   c                 s   s    | ]}|d kV  qdS )r   Nr   )r   dimr   r   r   r   /   s    z*flash_attention_forward.<locals>.<genexpr>zTensor query has shape  with a zero dimension.
FlashAttention does not support inputs with dim=0.
Please check your input shapes or use SDPA instead.   	layer_idx)
query_lengthr&   r"   softmax_scaler$   r%   use_top_left_masktarget_dtypeattn_implementationr*   )getloggerwarning_onceshapeany
ValueError	transposer   r&   r   _use_top_left_maskr   _attn_implementationr   r*   )r   r   r   r    r!   r"   r#   r$   r%   r&   kwargsseq_lenr.   attn_outputr   r   r   flash_attention_forward   sD   

r<   )r   NNNN)r   modeling_flash_attention_utilsr   r   utilsr   
get_logger__name__r1   r7   Tensorr   Moduler   r   floatintbooltupler<   r   r   r   r   <module>   sD    
	
