o
    灛iL3                     @  s  d dl mZ ddlmZ ddlmZ ddlmZ dd ZdaddZdaddZ	ej
edd Zej
eeddd Zej
eeddbddZej
edd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zej
eejd&d'd(d)dcd,d-Zej
eejd.d/d0ddd1d2Zed3d4 Zed5d6 Zed7d8 Zed9d: Zej
eejd;d'd(d)dcd<d=Zej
eejd>d/d0ddd?d@ZedAdB Z ej
eedCdedDdCZ!edEdF Z"ej
ej#edGdfdHdIZ$ej
ee%dJdgdKdJZ&edLdM Z'ej
ee%dNdgdOdNZ(edhdQdRZ)edidUdVZ*ej
ed*ej+fdjdYdZZ,d[d\ Z-ej
edkd]d^Z.ed_d` Z/d*S )l    )annotations   )jit   )core)mathc                 C  s   t | tjr	| jS | S N)
isinstancer   	constexprvalue)o r   Z/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/triton/language/standard.py_unwrap_if_constexpr
   s   r   icore.constexprc                 C  s4   d}| j }|dkr|dL }|d7 }|dks	t|S )Nr   r   r   r   r
   )r   log2nr   r   r   _log2   s   
r   c                 C  s$   | j }t||d @ dko|dkS )Nr   r   r   )r   r   r   r   r   _is_power_of_two   s   r   c                 C  s   | | d | S )z
    Computes the ceiling division of :code:`x` by :code:`div`

    :param x: the input number
    :type x: Block
    :param div: the divisor
    :param div: Block
    r   r   )xdivr   r   r   cdiv!   s   r   sigmoidc                 C  s   ddt |    S )Nr   )r   expr   r   r   r   r   /   s   softmaxFc                 C  s0   | t | d }t|}t|d}t|||S )Nr   )maxr   r   sumfdiv)r   ieee_roundingznumdenr   r   r   r   6   s   

c                 C  s   t j| | jgddS )zn
    Returns a contiguous flattened view of :code:`x`.

    :param x: the input tensor
    :type x: Block
    T)can_reorder)r   reshapenumelr   r   r   r   ravel@   s   	r(   c                 C  sT   | | | }|| }|| }|| }t || |}|||  }	|| | }
|	|
fS )a  
    Transforms indices of a row-major :code:`size_i * size_j` matrix into those
    of one where the indices are col-major for each group of :code:`size_g`
    rows.

    For example, for :code:`size_i = size_j = 4` and :code:`size_g = 2`, it will
    transform ::

        [[0 , 1 , 2 , 3 ],
         [4 , 5 , 6 , 7 ],
         [8 , 9 , 10, 11],
         [12, 13, 14, 15]]

    into ::

        [[0, 2,  4 , 6 ],
         [1, 3,  5 , 7 ],
         [8, 10, 12, 14],
         [9, 11, 13, 15]]
    r   minimum)r   jsize_isize_jsize_gijsize_gjgroup_idoff_inew_inew_jr   r   r   	swizzle2dL   s   r5   c                 C  s   t | d|S )a'  
    Returns a tensor filled with the scalar value 0 for the given :code:`shape` and :code:`dtype`.

    :param shape: Shape of the new array, e.g., (8, 16) or (8, )
    :type shape: tuple of ints
    :param dtype: Data-type of the new array, e.g., :code:`tl.float16`
    :type dtype: DType
    r   )r   full)shapedtyper   r   r   zeross   s   
r9   c                 C  s   t | j| jS )zS
    Creates a tensor of zeros with the same shape and type as a given tensor.
    )r9   r7   r8   )inputr   r   r   
zeros_like   s   r;   c           	      C  sJ   |r| |ko	||k }nd}| |kp|}t || |}t |||}||fS NFr   where)	value1index1value2index2tie_break_lefttiegtv_reti_retr   r   r   _argmax_combine      rH   c                 C     t | |||dS NTrH   r?   r@   rA   rB   r   r   r   _argmax_combine_tie_break_left      rN   c                 C  rJ   r<   rL   rM   r   r   r   _argmax_combine_tie_break_fast   rO   rP   c                 C     t | |S r   )r   maximumabr   r   r   _elementwise_max      rV   rR   return_indicesreturn_indices_tie_break_left)return_indices_argtie_break_argNTc                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjt dk rEt | j r6| 	t j
} n| j s?J d| 	t j} t j| |t|dS N	keep_dims    z"Expecting input to be integer type)r   _promote_bfloat16_to_float32_reduce_with_indicesrN   rP   r
   r8   primitive_bitwidthis_floatingtofloat32is_intint32reducerV   r:   axisrX   rY   r^   r   r   r   r      s   
r   zmaximum indexrC   )r[   c                 C     t | |d||d\}}|S NT)rX   rY   r^   )r   r:   rj   rC   r^   _retr   r   r   argmax      rp   c           	      C  sJ   |r| |ko	||k }nd}| |k p|}t || |}t |||}||fS r<   r=   )	r?   r@   rA   rB   rC   rD   lt	value_ret	index_retr   r   r   _argmin_combine   rI   ru   c                 C  rJ   rK   ru   rM   r   r   r   _argmin_combine_tie_break_left   rO   rw   c                 C  rJ   r<   rv   rM   r   r   r   _argmin_combine_tie_break_fast   rO   rx   c                 C  rQ   r   r)   rS   r   r   r   _elementwise_min   rW   ry   r*   c                 C  s   t | } |r|rt j| |t|dS t j| |t|dS t | jjdk rBt | j r3| 	t j
} n| j s<J d| 	t j} t j| |t|dS r\   )r   r`   ra   rw   rx   r
   r8   rb   rc   rd   re   rf   rg   rh   ry   ri   r   r   r   min   s   
rz   zminimum indexc                 C  rk   rl   )rz   rm   r   r   r   argmin   rq   r{   c                 C  s   | | S r   r   rS   r   r   r   _sum_combine      r|   r   c                 C  s   t | } t j| |t|dS )Nr]   )r   r`   rh   r|   )r:   rj   r^   r   r   r   r     s   
c                 C  s   | |A S r   r   rS   r   r   r   _xor_combine  r}   r~   zxor sumc                 C  s<   | j j}| stdtj| |d} tj| |t|||dS )Nz#xor_sum only supported for integers)_builder)r^   r   
_generator)typescalarrf   
ValueErrorr   r`   rh   r~   )r:   rj   r^   r   r   	scalar_tyr   r   r   xor_sum  s
   r   cumsumc                 C     t | } t | |t|S r   )r   r`   associative_scanr|   r:   rj   reverser   r   r   r   %     
c                 C  s   | | S r   r   rS   r   r   r   _prod_combine1  r}   r   cumprodc                 C  r   r   )r   r`   r   r   r   r   r   r   r   6  r   n_dimsc                 C  s,  | j |? }|d|  dd|| d  g}t| |}tddd d d d f }tt|d|  dd d d d d f |}tt|| dd d d d d f |}	t|| j}t|	| j}	tj| jj	dd}
|j
|
dd}|	j
|
dd}| j
|
dd}|t||	k|A ||A t|A }|j
| jddS )Nr   r   r   T)bitwidthsigned)bitcast)r'   r   r&   arangebroadcast_tor   r7   get_int_dtyper8   rb   rd   r>   r;   )r   flipr   r   n_outerr7   ymaskleftrightidtypeileftirightixro   r   r   r   _compare_and_swapB  s   
,("r   stageorderc                 C  s   | j |? }t||k |dkr6|d|d |   dd| g}tttddddddf || j}n|}t|D ]}t| ||||  |} q=| S )zb
    order_type 0 == ascending
    order_type 1 == descending
    order_type 2 == alternating
    r   r   r   N)	r'   r   static_assertr&   r   r   r7   static_ranger   )r   r   r   r   r   r7   r   r   r   r   r   _bitonic_mergeV  s   
.r   dim
descendingc                 C  sv   |d u rt | jd n|}t|t | jd kd t| j| }td|d D ]}t| |||k r4dn||} q)| S )Nr   z+only minor dimension is currently supportedr   )lenr7   r   r   r   r   r   )r   r   r   _dimr   r   r   r   r   sorto  s   r   c                 C  sF   t | } t |}| d u rt|d } | t|d ksJ dt| S )Nr   z2Currently only support flipping the last dimension)r   r   r   r
   )r   r7   r   r   r   _get_flip_dim  s   
r   c           	      C  s  t t| jt|| j  t t| j t| j}t| jt| jt|| j  }t | dg| }t ||}t 	dddddf dt 	dd k}t 
||D ]*}|}t 
d|d D ]}||krr||d krrt ||}q`t|| |d dd}qTt || j} | S )z
    Flips a tensor `x` along the dimension `dim`.

    :param x: the first input tensor
    :type x: Block
    :param dim: the dimension to flip along (currently only final dimension supported)
    :type dim: int
    r   r   Nr   Tr]   )r   r   r   r7   r   r'   r   r&   expand_dimsr   r   r   )	r   r   stepsstartr   r   r   flip2r+   r   r   r   r     s    
 (r   c                 C  sT   t | |}t|jtsJ t|jdkr|S t ||jdd d|jd  g S )z
    Interleaves the values of two tensors along their last dimension.

    The two tensors must have the same shape.

    Equivalent to `tl.join(a, b).reshape(a.shape[-1:] + [2 * a.shape[-1]])`
    r   Nr   )r   joinr	   r7   listr   r&   )rT   rU   cr   r   r   
interleave  s
   	&r   )r   r   )F)NFTF)TFr<   )NFNN)r   F)r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   )0
__future__r   runtime.jitr    r   r   r   r   r   _tensor_member_fnr   _add_math_1arg_docstrr   r   r(   r5   r9   r;   rH   rN   rP   rV   _add_reduction_docstrr   rp   ru   rw   rx   ry   rz   r{   r|   r   r~   builtinr   _add_scan_docstrr   r   r   r   r   CONSTEXPR_0r   r   r   r   r   r   r   r   <module>   s    

	


&












	
		