o
    i                 !   @   s4  U d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlm	Z	 d dl
mZ d dl mZmZ d dlmZmZ d dlmZmZmZmZ d dlZd dlZd dlmZ d dlmZ d dlm  mZ  d dlm!Z!m"Z"m#Z# d d	l$m%Z% d d
l&m'Z' d dlm(Z(m)Z)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0m1Z1 d dl2m3Z4 d dl5m6Z6 ej7j8Z8g Z9e:e; e<d< ej=j>j?Z?G dd deZ@		ddedejAdeBdeBfddZCeeCejAjDddZEeeCejAjDdZFeeCejAjDddZGeeCejAjHdZIde#deJde#fd d!ZKe%e?jLe1d"eFd#e#d$e#fd%d&ZLe%e?jMe1d"eFd#e#d$e#fd'd(ZMe%e?jNe1d"eFd#e#de#d)eOd*eOfd+d,ZNe%e?jPe1d"eFd-e#d.eOd/eOd0eOd1eBd2e#fd3d4ZPe%e?jQjRgd5d6 ZSe%e?jQj#gd7e#fd8d9ZTe%e?jUe1 eFd:e#de#fd;d<ZUe%e?jVe1d"eFd-e#d:e#fd=d>ZVe%e?jWe1d"d-e#d:e#d?eOd@eOfdAdBZWe%e?jXe1 eFd:e#de#fdCdDZXe%e?jYe1 eFd-e#d:e#de#fdEdFZYe%e?jZe1d"d-e#d:e#d*eOfdGdHZZe%e?j[e1d"eFd-e#d:e#dIeOdJeBfdKdLZ[e%e?j\e1d"eFddNe#d:e#dOe;fdPdQZ\e%e?j]eFd-e#dRe#fdSdTZ]e%e?j^e1 eFd:e#de#fdUdVZ^e%e?j_e1d"eFd-e#d:e#de#fdWdXZ_e%e?j`d:e#dYe#de#fdZd[Z`e%e?jad-e#d:e#dYe#debe#e#f fd\d]Zae%e?jce1 eFd-e#d:e#d^e#d_eOd`eOdaeBdJeBde#fdbdcZce%e?jde1d"eFd-e#d:e#dde#de#fdedfZddge#dheJfdidjZedkejffdldmZge%e?jhe1 eFe@jijjfd:e#dne#dheJde#fdodpZhe%e?jke1d"eFd-e#dRe#dne#dheJfdqdrZke%e?jlddsdtZme%e?jne1 eFe@jijjdufd:e#dne#dheJd)eOfdvdwZne%e?jojpeFd-e#d:e#dne#dheJd)eOf
dxdyZoe%e?jojqeFd-e#d:e#dne#dheJd)eOd"e#fdzd{Zre%e?jsjpeFd-e#d:e#dne#dheJd|eOf
d}d~Zse%e?jsjteFd-e#d:e#dne#dheJd|eOd"e#fddZud-e#d:e#dne#dYee# dheJdeJde#de#fddZve%e?jwe1d"eFd-e#d:e#deJde#fddZwe%e?jxe1d"d-e#d:e#dne#dYee# dheJdeJde#de#fddZxe%e?jye1d"d-e#d:e#dne#dYee# dheJdeJde#de#fddZye%e?jze1 eFde@jijjfd:e#dne#dYee# dheJde#f
ddZze%e?j{e1d"eFde@jijjfd-e#d:e#dne#dYee# dheJde#fddZ{e%e?j|e1 eFe@jijjfdRe#dne#dheJde#fddZ|e%e?j}e1d"eFe@jijjfd-e#d:e#dne#dheJde#f
ddZ}e%e?j~e1 ddRe#de#deOfddZ~e%e?je1 de#de#de#fddZe%e?je1 d-e#de:eJ deJdeJdeJdeJfddZe%e?jj#	 			dd:e#deJdeeJ deeJ deJf
ddZde#deJdeeJ deeJ debeJeJf f
ddZe%e?je1 	 			ddRe#de#deJdeeJ deeJ deJfddZe%e?je1 d-e#de:eJ deJdeJfddZe%e?je1 d-e#de:eJ deJdeJdeJf
ddZd-e#d"e#dejffddZe%e?je1d"eEd-e#de#deJdejffddZe%e?je1 eEd-e#de#deJdejffddZdd Ze%e?je1 dRe#de:eJ de:eJ de:eJ de:eJ de#fddZe%e?je1 eFdRe#de:eJ de:eJ de:eJ de:eJ de:eJ de#fddĄZe%e?je1 d-e#de#d/eOfddǄZe%e?je1 dNe#de:eJ deJdeJdeJde#fdd̄Ze%e?jjpeF	dd-e#d:e#deeO de#fddτZe%e?je?jjpe8je?jjpe8jdRe#deOdeeB fdd҄Ze%e?je1ddԃdRe#deOdeeB fddքZe%e?je1 de#deJdeBfddلZe%e?je1ddڍde#deJdeBfdd܄Ze%e?je1 			ddYe#de#deJdeBdeBde#fddZe%e?je1 d-e#de#deJdeJdeBf
ddZde:eJ fddZde:e# deJdeJde:e# fddZde:e# fddZde:e# deJfddZde:e# deJdeJfddZe%e?jjpe?jjtg	dde:e# deJdeJdee# de#f
ddZe%e?jjpe?jjtg	 	dd:e#de:eJ deJdee:e#  dee:e#  f
ddZe%e?jj#ddRe#deJdeJdebe#df fddZe%e?jjp	 ddRe#de:eJ deJdebe#df fddZe%e?jj#dd:e#deJdeJdebe#df fdd Ze?jje8j	 dd:e#de#deJdebe#df fddZe%e?je1ddڍeFdd:e#de#de#d)eJd.eJf
ddZe%e?je1 eF			dd:e#de#de#d)eJd.eJdeBfd	d
Ze%e?je1ddڍeFdd:e#de#de#d)eJd.eJf
ddZe%e?jjpeFd-e#dRe#de#de#dee# deJdeJdeJdeJde:eB debee# ee# ee# f fddZe%e?jjtd-e#dRe#de#de#dee# deJdeJdeJdeJde:eB dej#dej#dej#debee# ee# ee# f fddZdee# dee# fddZe%e?jjpde#dRe#de:eJ de#de#dYee# dee# de:eB debee# ee# ee# f fd d!Ze%e?jjtde#dRe#de:eJ de#de#dYee# dee# de:eB dej#dej#dej#debee# ee# ee# f fd"d#Ze%e?jjpdRe#de:eJ dYee# deeO debe#e#f f
d$d%Ze%e?jjpde#dRe#de:eJ de#dYee# de:eB debee# ee# f fd&d'ZdRe#dYee# dee# d(ee# d)ee# daeBd*eOdeOd+eBdebe#e#e#ee# ee# f fd,d-Ze%e?je1dd.d/dRe#dYee# dee# d(ee# d)ee# daeBd*eOdeOdebe#e#e#f fd0d1Ze?jjpe8je?jjpe8jdRe#dYee# dee# d(ee# d)ee# daeBd*eOdeOdebe#e#e#f fd2d3Ze?jjpe8jdde:e# fd4d5Ze%e?jjpdRe#dYee# dee# d(e#d)e#d*eOdeOdebe#e#e#f fd6d7Ze%e?jjpdRe#dYee# dee# d(e#d)e#daeBd*eOdeOdebe#e#e#f fd8d9Ze%e?jjdRe#dYee# dee# daeBd*eOdeOdebe#e#e#f fd:d;Ze%e?jjpdRe#dYee# dee# d(e#d)e#daeBd*eOdeOdebe#e#e#e#e#f fd<d=ZdRe#dYee# dee# d(e#d)e#deOdaeBde#fd>d?Ze%e?jjpdRe#dYee# dee# d(e#d)e#d*eOdeOdebe#e#e#e#f fd@dAZe%e?jjpdRe#dYee# dee# d(e#d)e#d*eOdeOdebe#e#e#e#e#e#f fdBdCZe%e?jjpdRe#dYee# dee# d(e#d)e#d*eOdeOdebe#e#e#e#f fdDdEZe%e?je1ddԃeFddFdGZe%e?je1 dddddddHdee#e)f dkeejf dIeej dJeBdKeBdLeej fdMdNZe%e?je?je?jge1 dOdP Ze?jjpe8je%e?jǃe1ddԐddQdRe#dYe#dee# d(ee# d)ee# daeBdReOdSeOfdTdUZǐdVdW Ze%e?jjpde#dRe#dYee# d(ee# d)ee# d.ee# d/ee# deBdeOde:eB dXe#debe#ee# ee# f fdYdZZe%e?jjpde#dRe#dYee# d(ee# d)ee# d.ee# d/ee# deBdeOde:eB debe#ee# ee# f fd[d\Ze%e?jjtde#dRe#dYee# d(ee# d)ee# d.ee# d/ee# deBdeOde:eB dej#dej#dej#debe#ee# ee# f fd]d^Ze%e?j̃e1ddԐddRe#d-e#dYe#d(ee# d)ee# d.ee# d_ee# dSeOfd`daZe%e?j̓e1ddԐddRe#d-e#dYe#d(ee# d)ee# d.ee# d_ee# dSeOdbe#fdcddZe%e?j΃e1 eFdRe#debeJeJf fdedfZd:e+de+de:eJ deJfdgdhZe%e?jуe1 d:e+de+de:eJ fdidjZe%e?j҃e1 dRe+de+de:eJ de:eJ de:eJ f
dkdlZe%e?jӃddmde+deJde+dne+d.e)f
dodpZe%e?jԃe1 ddmde+deJde+dne+d.e)f
dqdrZddmde+deJde+dne+dseBd.e)fdtduZe%e?jjpe?jjpe8jddwdxZe%e?j׃de+deJde+dne+fdydzZe%e?j؃e1 de+deJde+dne+fd{d|Zde+deJde+dne+dseBf
d}d~Ze%e?jڃe1dddeFd:e#debe#e#f fddZe%e?jۃe1 	v	u	dde#deeBeJeOf deeBeJeOf deej fddZe%e?j݃dddZݐdd Zސdd Ze%e?jje%e?jje%e?jje?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8jdRe#dee:eJ  dee:eO  de#fddZe%e?jje%e?jje%e?jje?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8jdRe#dee:eJ  dee:eO  de#fddZdddZe%e?jjpe?jjtge?jjpe8je?jjpe8je1ddd	ddRe#de:eJ deeO de#fddZe%e?jjpe?jjtge?jjpe8je?jjpe8je1ddd	ddRe#de:eJ deeO de#fddZe%e?jjpe?jjtge?jjpe8je?jjpe8je1ddd		ddRe#de:eJ deeO deeO de#f
ddZe%e?jjpe?jjtge?jjpe8je?jjpe8je1ddd		ddRe#de:eJ deeO deeO de#f
ddZe%e?jjpe?jjtge?jjpe8je?jjpe8je1ddd			ddRe#de:eJ deeO deeO deeO de#fddZe%e?jjpe?jjtge?jjpe8je?jjpe8je1ddd			ddRe#de:eJ deeO deeO deeO de#fddZeF	ddRe#de:eJ de:eeO  deBde#f
ddZdd Zdd Zdd Zdd Z	dddZdd Zdd ZdddZdddZdd Ze%e?jje?jje8je?jje8jdd Ze%e?jje?jje8je?jje8jdd Ze%e?jje?jje8je?jje8jdd Ze%e?jje?jje8je?jje8jdd ZdÐdĄ ZddŐdƄZddǐdȄZ dɐdʄ Ze%e?jje?jje8je?jje8jdːd̄ Ze%e?jje?jje8je?jje8jd͐d΄ ZdϐdЄ Zdѐd҄ Ze%e?jje?jje8je?jje8jdӐdԄ Ze%e?jje?jje8je?jje8jdՐdք Z	e%e?j
je?j
j᠑e8je?j
j᠑e8jdאd؄ Ze%e?jje?jj᠑e8je?jj᠑e8jdِdڄ Ze%e?jje%e?jje?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8je?jj᠑e8jdېd܄ Ze%e?jjpe?jjtge1 	ddRe#de:eJ deBdeeO de#f
dސd߄Ze%e?jjpe?jjtge?jjpe8je1 		ddRe#de:eJ deBdeeO deeO de#fddZe%e?jjpe?jjtge1 			ddRe#de:eJ deBdeeO deeO deeO de#fddZdddZdd Zdee# dee# de#de#fddZde,de#fddZeFdRe#de:eJ deBde:eeO  de#f
ddZe%e?jjpde#de#deBfddZe%e?je?jge1 dd Ze%e?jgdd Ze%e?jgdddZe%e?jgdd Ze%e?jgdd Zd:e#dne#dYee# dheJdeJdebe#e#f fddZe%e?je1ddd:e#dne#dYee# dheJdeJdebe#e#f fd dZe%e?j e1ddd:e#dne#dYee# dheJdeJdebe#e#f fddZ de#deOde#fddZ!de#deOde#fddZ"d	e#de,fd
dZ#de,de#de#fddZ$dee# de#fddZ%deJdeBdkejfdIejfddZ&de#deJdeJdeBfddZ'de#deJdeJdeJdeBf
ddZ(de#de:eJ deBfddZ)de#de:eJ deBfdd Z*e%e?j+e1 eFde#de:eJ deBfd!d"Z+	 	 		dde#d#e#d$eJd%eJdeBd&eBde#fd'd(Z,e%e?j-e1 eF	 	 	dde#d#e#d$eJd%eJdeBde#fd)d*Z-e%e?j.e1ddڍeFd+d, Z.e%e?j/e1 dde@jijjfd-d.Z/d/ej#d0ej#d1eBdeBfd2d3Z0e?j1jpe8je?j1jte8je1dd4dd5d6d7Z1e%e?j2jpe?j2jtge?j2jpe8je1 eF		ddRe#debeJeJf deBd8eeO d9eeO de#fd:d;Z3e%e?j2je?j2j᠑e8je?j2j᠑e8je1 eF	dde#deebeJeJf  deBdeebeOeOf  de#f
d<d=Z4e%e?j5e%e?j6e%e?j7eFe1 de#debeJdf de#fd>d?Z8e%e?j9e%e?j:e%e?j;eFe1 de#debeJdf de#fd@dAZ<de#debeJdf dBeeJeJeJge#f de#fdCdDZ=e%e?j>e%e?j?e%e?j@e1d"dEdF ZAe%e?jBe1dGdHdddIdJdKZBe%e?jCe1 dddLdMdNZCe%e?jDjpe?jDjtge1 dejEdddOde)dkeejf dPejFdIeej dJeBf
dQdRZGe%e?jDjHgdejEdddOde)de)dkeejf dPejFdIeej dJeBfdSdTZIe%e'dUdV ZJe%e?jKe?jKjpe8je1 ddde@jijjfdRe#dne#de)dWe)dYee# dheJde#fdXdYZKe%e?jLe?jLjpe8je1ddZdRe#dne#dheJdebe#e#f fd[d\ZLe%e?jMjp	v	dddd]d^e#d_e#d7e#d`eOdaeBdbee# d/eeO debe#e#f fdcddZNdedf ZOe%e?jPge1ddڍeFddgdhZPe%e?jQe1 didj ZQe%e?jRdkdl ZRe%e?jSjpe?jSjtgdddmd:e#dkeejf dee# de#fdndoZTe%e?jUjpe?jUjVgdd:e#deeJ fdpdqZWe%ej>j?jXddrdsZXe%e?jYe1 dddtdudvZYe%e?jZjpddwd:ej#deej dej#fdxdyZZddzd{d|Z[dddtd}d~Z\e%e?j]e1 dd Z]e%e?j^dddZ^eOe?j_e?j` eOe?jae?j eOe?jbe?j eOe?jce?jP eOe?jde?jQ eOe?jee?jf eOe?jge?jX eOe?jhe?ji eOe?jje?jU eOe?jke?jl eOe?jme?jn eOe?joe?jp eOe?jqe?jr eOe?jse?jt eOe?jue?jv eOe?jwe?jx eOe?jye?jz eOe?j{e?j| eOe?j}e?j~ eOe?je?j eOe?je?j eOe?je?j eOe?je?j eOe?je?j eOe?je?j^ dS (      N)CallableIterable)nullcontext)Enum)partialreduce)chainproduct)AnycastOptionalUnion)	sym_floatsym_intTensorregister_decomposition)	out_dtype)IntLike
NumberTypesuggest_memory_format
TensorLikeTensorSequenceType)_maybe_convert_to_dtype_maybe_resize_out_safe_copy_outout_wrapper)_pytree)tree_map__all__c                   @   s   e Zd ZdZdZdZdS )	Reductionr         N)__name__
__module____qualname__NONEMEANSUM r)   r)   ^/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/torch/_decomp/decompositions.pyr    1   s    r    Fftype_promotioncompute_dtype_onlyinclude_non_tensor_argsc                    s    t  fdd}|S )Nc                     s   rt tjjfnt f  fddtj| i |D }tj|di\fdd}fdd}t|| i t||}rA|S t||S )Nc                    s   g | ]	}t | r|qS r)   )
isinstance.0x)allowed_typesr)   r*   
<listcomp>E   s    z-type_casts.<locals>.inner.<locals>.<listcomp>type_promotion_kindc                       t | tr
|  S | S Nr/   r   tor2   computation_dtyper)   r*   increase_precO      

z0type_casts.<locals>.inner.<locals>.increase_precc                    r6   r7   r8   r:   )result_dtyper)   r*   decrease_precU   r>   z0type_casts.<locals>.inner.<locals>.decrease_prec)	r   torchtypes_Numberpytreearg_tree_leavesutilselementwise_dtypesr   )argskwargs	flat_argsr=   r@   rr-   r+   r.   r,   )r3   r<   r?   r*   inner@   s    


ztype_casts.<locals>.inner)	functoolswraps)r+   r,   r-   r.   rM   r)   rL   r*   
type_casts:   s    rP   T)r,   r-   )r,   )r,   r.   r2   dimreturnc                 C   s$   t ||   D ]}| d} q| S N)rangerQ   	unsqueeze)r2   rQ   _r)   r)   r*   _unsqueeze_to_dimw   s   rX   
grad_inputout_gradyc                 C   s   | d||     S Nr!   conj_physicalrZ   r[   r)   r)   r*   tanh_backward}      r`   c                 C   s   | |d|     S r\   r]   r_   r)   r)   r*   sigmoid_backward   ra   rb   beta	thresholdc                 C   s.   ||   }t|| |k| | | |d  S N      ?)exprA   where)rZ   r2   rc   rd   zr)   r)   r*   softplus_backward   s   "rj   grad_outputalphascaleinput_scale	is_resultself_or_resultc           	      C   sb   || }|}|}|rt |dk| | ||  | | S t |dk| | | t ||  | | S Nr   )rA   rh   rg   )	rk   rl   rm   rn   ro   rp   negcoefposcoef
negiptcoefr)   r)   r*   elu_backward   s   ru   c                 C      t | |S r7   )rA   	full_likeselfvaluer)   r)   r*   fill_scalar      r{   rz   c                    s(   t   dk fdd t|  S )Nr   c                      s   d    dS )Nz@fill only supports 0-dimension value tensor but got tensor with z dimensionsrQ   r)   rz   r)   r*   <lambda>       zfill_tensor.<locals>.<lambda>)rA   _checkrQ   atencopyrx   r)   r~   r*   fill_tensor   s
   

r   ry   c                 C   s    t jt j| d ddddd S N   r   min   maxrA   clampry   r)   r)   r*   hardsigmoid   s    r   c                 C   s   t |dk|dk @ | d dS )Ng      g      @gUUUUUU?        rA   rh   rk   ry   r)   r)   r*   hardsigmoid_backward   s
   r   min_valmax_valc                 C   s   t ||k||kB d| S )Nr   r   )rk   ry   r   r   r)   r)   r*   hardtanh_backward   s   r   c                 C   s$   | t jt j| d dddd d S r   r   r   r)   r)   r*   	hardswish   s   $r   c              
   C   s,   t |dkdt |dk | |d d  | S )Nr   r         ?r   r   r)   r)   r*   hardswish_backward   s
   r   c                 C   s   t ||kd| S rq   r   )rk   ry   rd   r)   r)   r*   threshold_backward      r   negative_slopeself_is_resultc                 C   s   t |dk| | | S rq   r   )rk   ry   r   r   r)   r)   r*   leaky_relu_backward   s   r   nonegradapproximatec                 C   s   d}d}d}|dkrO|| d }d}|| }|| }	||||	   }
t |
}d| }d| }d| }d||  }|dd| |   }|| | }| ||  S |}|| d }ddt ||   }|t || d	  }| |||   S )
Ng;f?g;f?gmBP?tanhr   gHm?r!   r   g      )rA   r   erfrg   )r   ry   r   M_SQRT2	M_SQRT1_2
M_2_SQRTPIkBetakKappax_sqx_cuberM   
tanh_innerleftrightleft_derivativetanh_derivativeinner_derivativeright_derivativekAlphacdfpdfr)   r)   r*   gelu_backward   s,   
r   inputc                 C   s:   t t|}t |}|| d||   }| ||  S r\   )rA   r   Fsoftplussigmoid)rk   r   input_tanh_softplusinput_sigmoidoutr)   r)   r*   mish_backward  s   
r   c                 C   s   | t |  S r7   )rA   r   r   r)   r)   r*   silu"  s   r   c                 C   s,   ddt |   }| | d|d|    S r\   )rA   rg   )rk   ry   r   r)   r)   r*   silu_backward)  s   r   weightc                 C   s   t | dk| ||  S rq   r   )ry   r   r)   r)   r*   _prelu_kernel1  s   r   c                 C   s4   t |dk| ||  }t |dkd||  }||fS )Nr   r   r   )rk   ry   r   
input_gradweight_gradr)   r)   r*   _prelu_kernel_backward6  s   r   noiseloweruppertrainingc                 C   s6   |r|| dkr|  |S || d }t| |||S )Ngư>r"   )mulr   r   )rk   ry   r   r   r   r   r   r   r)   r)   r*   rrelu_with_noise_backwardA  s   
r   bufferc                 C   sN   |dk }t |dd}t |dd}t t | }| |||d|     S )Nr   r!   rT   )rA   rh   rg   abs)rk   ry   r   in_negative	max_derivsignri   r)   r)   r*   log_sigmoid_backwardV  s
   r   loss	reductionc                 C   s0   |t jjkrt| S |t jjkrt| S | S r7   )r    r'   rz   rA   meanr(   sum)r   r   r)   r)   r*   apply_loss_reductionc  s
   

r   dtypec                 C   s4   | t jkrt jS | t jkrt jS | t jkrt jS d S r7   )rA   	complex32float16	complex64float32
complex128float64r   r)   r)   r*   to_real_dtypel  s   


r   targetc                 C   s   | | d }t ||S )Nr"   )r   )ry   r   r   r   r)   r)   r*   mse_loss{  s   
r   c                 C   s,   |t jjkrd|  nd}|||  |  S )N       @)r    r'   rz   numel)rk   r   r   r   normr)   r)   r*   mse_loss_backward  s   r   c                 C   sF   t j| ||d}| td}t j||dd}t |}t |||S )N)rQ   r   z-infTrQ   keepdim)rA   softmaxeqfloatall
zeros_likerh   )ry   rQ   r   r   maskedmasked_rowszerosr)   r)   r*   safe_softmax  s
   
r   rf   c                 C   s<   | |   }t||k d|d  | |d|  }t||S )Nr   r"   )r   rA   rh   r   )ry   r   r   rc   r   r)   r)   r*   smooth_l1_loss  s   	&
r   c           	      C   sZ   |t jjkrd|  nd}|| }t|}||  }t||k || | |t| S re   )r    r'   rz   r   rA   r   rh   r   )	rk   ry   r   r   rc   r   r2   abs_x	norm_gradr)   r)   r*   smooth_l1_loss_backward  s   

r   c                 C   *   t | ||||}t||j t||ddS NT	copy_fromcopy_toexact_dtype)r   r   shaper   )rk   ry   r   r   rc   rY   resultr)   r)   r*   smooth_l1_loss_backward_out     
r   deltac              
   C   s`   |t jjkrd|  nd}|| }t|| k | |  | t||k||  | || |  S re   )r    r'   rz   r   rA   rh   )rk   ry   r   r   r   r   r2   r)   r)   r*   huber_loss_backward  s    r   c                 C   r   r   )r   r   r   r   )rk   ry   r   r   r   rY   r   r)   r)   r*   huber_loss_backward_out  r   r  ignore_indextotal_weightc                 C   s   |  dk rdnd}|tjjkr| | } ||}t||k|d}t|}	t|	||d}	|	  |     kr=dkrDn n| |} |d urcdd t	|  D }
|j
d |
|< ||
}| | } t||k| d} |	|  S )Nr"   r   r!   g      c                 S   s   g | ]}d qS r!   r)   r1   rW   r)   r)   r*   r4     r   z&_nll_loss_backward.<locals>.<listcomp>)rQ   r    r'   rz   rV   rA   rh   r   scatterrU   r   reshape)rk   ry   r   r   r   r  r  channel_dimsafe_targetrY   	new_shaper)   r)   r*   _nll_loss_backward  s    	

 

r  c           
      C   s   |  dks
J dt|  |}||}|d dks'J d| d| |d }||d|}||||}t|}d| | | |  }	||  }tj||	g|dS )Nr   z*glu does not support 0-dimensional tensorsr"   z.Halving dimension must be even, but dimension z	 is size rf   r}   )rQ   rF   canonicalize_dimsizenarrowrA   r   cat)
rk   ry   rQ   wrap_dimnIn	inputSize	firstHalf
secondHalfgradInputFirstHalfgradInputSecondHalfr)   r)   r*   glu_backward  s   

r  c                 C   sr  d|    krdksJ d J d|  dksJ d|  dko)|  dk}|sC|jd |jd ksCJ d|j d|j d| dksXJ d	|j d
|  df|d u si| |jd ksiJ d|tjjkr|  dkr|   dkr| jd |jd ksJ d|jd  d|    d| jd  n|   dkr|  dksJ d| j t| ||||||S )Nr   r"   input tensor should be 1D or 2Dr!   ;0D or 1D target tensor expected, multi-target not supportedsize mismatch (got input: 
, target: ):expected total_weight to be a single element tensor, got: z (z
 elements)rT   z<weight tensor should be defined either for all or no classesz7Expected a tensor of dimension 1 and tensor.size[0] == z but got: dimension z and tensor.size[0] == z7Expected a single element grad_output tensor, but got: )rQ   r   r   r    r&   rz   r  )rk   ry   r   r   r   r  r  no_batch_dimr)   r)   r*   nll_loss_backward  s:   ("
r  c                 C   s   |  dksJ d|   |  dksJ d|   |jd |jd kr<|jd |jd kr<|jd |jd ksHJ d|j d	|j | dks\J d
|j d|  dt| ||||||S )N   zSonly batches of spatial inputs supported (4D tensors), but got input of dimension: r   zUonly batches of spatial targets supported (3D tensors) but got targets of dimension: r   r"   r!   r  r  r  z ( z, elements))rQ   r   r   r  )rk   ry   r   r   r   r  r  r)   r)   r*   nll_loss2d_backwardF  s*   r!  c              	   C   s\   |d t t |  | dd |t t | | dd  }|d ur)|| }t||S )Nr!   r)   i)rA   maximumlog1pnew_fulllogr   )ry   r   r   r   r   r)   r)   r*   binary_cross_entropyi  s   

r&  c                 C   sR   d}| ||  t j|d|  |d }|d ur|| }|tjjkr'||  }|S )Ng-q=r!   r   )rA   r   r    r'   rz   r   )rk   ry   r   r   r   EPSILONr   r)   r)   r*   binary_cross_entropy_backward  s   
"r(  c                 C   s    t t |  | }t||S r7   )rA   r#  rg   r   )r   r   r   r   r)   r)   r*   soft_margin_loss  s   
r)  c                 C   s6   ||  t || d  }|tjjkr||  }|S r\   )rA   r   r    r'   rz   r   )rk   ry   r   r   rY   r)   r)   r*   soft_margin_loss_backward  s   	r*  r"   otherpc                 C   s   t j| | |dS )N)r,  )r   r   )r   r+  r,  r)   r)   r*   dist  r   r-  x1x2c           	      C   s   |  ddd}tj|tjd}| ddd}tj|tjd}t| d||gd}t|||gd}||j}|	d
 S )Nr"   rT   Tmemory_formatr   )powr   rA   	ones_likecontiguous_formatr  r   matmulmT	clamp_minsqrt)	r.  r/  x1_normx1_padx2_normx2_padx1_x2_r   r)   r)   r*   _euclidean_dist  s   r@  input_sizesstartendstepc                 C   s   |  |}t|| ||||S r7   )	new_zerosrA   slice_scatter)rk   rA  rQ   rB  rC  rD  rY   r)   r)   r*   slice_backward  s   

rG  r!   c                 C   s^  ddl m} |  }|dkrtdt|  |}t|  }t|  }|dkr.td|d ur4|nd}	|d ur<|nt	j
}
|	dk rI|	|| 7 }	|
dk rS|
|| 7 }
|	dk rZd}	n
|	|| krd|| }	||
t	j
krp|| }
n|
|	k rw|	}
n
|
|| kr|| }
|  |	||   }|
|	 }|| d | ||< ||  |9  < | jrtd| |||S )Nr   statically_known_truez,slice() cannot be applied to a 0-dim tensor.zslice step must be positiver!   z<Slice decomposition for quantized tensors aren't implemented)%torch.fx.experimental.symbolic_shapesrI  rQ   RuntimeErrorrF   r  listr  stridesysmaxsizestorage_offsetis_quantizedNotImplementedError
as_strided)ry   rQ   rB  rC  rD  rI  ndimsizesstrides	start_valend_valrP  lenr)   r)   r*   slice_forward  sD   	
rZ  c                    s@   | j |  dtf fdd}||d d}|||  }||fS )zn
    Normalize start and end such that both are in the range
    [0, x.get_size()[dim]] and start <= end.
    rR   c                    s,   | d u r|S | dk r|   } t t| ||S rq   r   r   )valr   r   defaultdim_sizer)   r*   
clamp_wrap  s
   z(_normalize_start_end.<locals>.clamp_wrapr   )r   int)r2   rQ   rB  rC  r`  r)   r^  r*   _normalize_start_end  s
   
rb  srcc              	   C   sB  t | j|}| j| }t| |||\}}t| j}|| |d  | ||< ||}|dkr;||kr;|dkr;| S d g|   }t	j
|| jd}	|	| | ||< t	j|| jt	jd}
|dkrht	|
|	|k}
||krtt	|
|	|k }
|dkrt	|
|	| | dk}
dg|   }d||< |
|}
t|
t||
|d| S )Nr!   r   devicere  r   rT   )rF   r  rT  r   rb  rL  expandclonerQ   rA   arangere  onesboollogical_andviewr   rh   _unsafe_masked_index)r   rc  rQ   rB  rC  rD  r_  src_sizeindicesidxmask
mask_shaper)   r)   r*   rF    s,   




rF  indexc                 C   s   |  |}t|| ||S r7   )rE  rA   select_scatter)rk   rA  rQ   rt  rY   r)   r)   r*   select_backwardE  s   
rv  offsetdim1dim2c                 C   s   |  |}t|| |||S r7   )rE  rA   diagonal_scatter)rk   rA  rw  rx  ry  rY   r)   r)   r*   diagonal_backwardL  s   
r{  input_dtypec                 C   s   | j |kr
||}|S r7   )r   r9   )rk   rY   r|  r)   r)   r*   _cast_grad_to_input_dtypeU  s   

r}  outputc                 C   s0   | | }||t j||dd  }t| || S NTr   )rA   r   r}  
contiguous)rk   r~  rQ   r|  new_grad_outputrY   r)   r)   r*   _softmax_backward_data]  s
   
r  c                 C   s*   | t |t j| |dd  }t| ||S r  )rA   rg   r   r}  )rk   r~  rQ   r|  rY   r)   r)   r*   _log_softmax_backward_datao  s   
r  c           
      C   sZ   | |d  ||d   }t tjtj|d}|d||d}|d|| |d}	||	 S )z/Utility function to implement im2col and col2imr"   r!   r   re  r   rT   )r   rA   ri  int64rV   )
input_dkernel_d
dilation_d	padding_dstride_dre  blocks_d	arange_kwblocks_d_indiceskernel_gridr)   r)   r*    _im2col_col2im_indices_along_dim{  s
   r  kernel_sizedilationpaddingrM  c              	      s&  t tdkdd  t t dkdd  t tdkdd  t tdkdd  ddd	}|d
 | d | ddd |d | jt}t |dv odtdd dd  D fdd tdd tdd   D t tdd D  fdd |dk}|s| d} | j\}}	}
}\}}\}} \}}\}}t|
||||| j	}t|||||| j	}t
| ||||f}|dd}|d d d d ||f }|dddddd}|d}|d}|||	| | || }|s|d}|S ) Nr"   c                   S      dS )Nz"im2col(): only 2D kernel supportedr)   r)   r)   r)   r*   r         zim2col.<locals>.<lambda>c                   S   r  )Nz$im2col(): only 2D dilation supportedr)   r)   r)   r)   r*   r     r  c                   S   r  )Nz#im2col(): only 2D padding supportedr)   r)   r)   r)   r*   r     r  c                   S   r  )Nz"im2col(): only 2D stride supportedr)   r)   r)   r)   r*   r     r  Tc                    B   |rt dd  D nt dd  D }t| fdd d S )Nc                 s       | ]}|d kV  qdS r   Nr)   r1   r,  r)   r)   r*   	<genexpr>      z1im2col.<locals>.check_positive.<locals>.<genexpr>c                 s       | ]}|d kV  qdS r  r)   r  r)   r)   r*   r    r  c                          d  S Nz& should be greater than zero, but got r)   r)   param
param_namer)   r*   r         z0im2col.<locals>.check_positive.<locals>.<lambda>r   rA   r   r  r  strictcondr)   r  r*   check_positive     (zim2col.<locals>.check_positiver  r  r  Fr  rM  r   r   c                 s       | ]}|d kV  qdS r  r)   r1   dr)   r)   r*   r    r  zim2col.<locals>.<genexpr>r   c                         dt   S )NzmExpected 3D or 4D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: tupler)   r   r)   r*   r         c                 s   s>    | ]\}}}}}d |d|  ||d    d  |  V  qdS )r!   r"   Nr)   r1   r   paddilkerstr)   r)   r*   r    s
    "
r2  c                 s   r  r  r)   )r1   cr)   r)   r*   r    r  c                      s6   dt dd   d d  d d d dS )	Nz!Given an input with spatial size r2  , kernel_size=, dilation=
, padding=	, stride=z9, the calculated shape of the array of sliding blocks is z*, but its components must be at least one.r  r)   r  r  output_sizer  r   rM  r)   r*   r     s    r   r   rT   r!   r      T)rA   r   rY  r   r   r  ziprV   r  re  r   r  permuter  r  squeeze)r   r  r  r  rM  r  rT  batched_input	batch_dimr  input_hinput_wstride_hstride_w	padding_h	padding_w
dilation_h
dilation_wkernel_hkernel_wblocks_row_indicesblocks_col_indicespadded_inputr~  num_blocks_rownum_blocks_colr)   r  r*   im2col  sd   	



 




r  r  c              
      s  t tdkdd  t tdkdd  t tdkdd  t tdkdd  t tdkdd  d$d	d
}|d |d |ddd |d |d | jt}t |dv outdd dd  D fdd d d  }t d | dkfdd dd tD }	|	d |	d   t d  k fdd t  dk fdd |dk}
|
s| d} | j\}}\}}\}}\}}\}}| d d | g |	 } | dddd dd!} t	|||||| j
}t|d }t	|||||| j
}d"d tD }| d d t g| }d d ||f}tj||| dd#}t|| | | | f}|
sf|d}|S )%Nr"   c                   S   r  )Nzonly 2D output_size supportedr)   r)   r)   r)   r*   r     r  zcol2im.<locals>.<lambda>c                   S   r  )Nzonly 2D kernel supportedr)   r)   r)   r)   r*   r     r  c                   S   r  )Nzonly 2D dilation supportedr)   r)   r)   r)   r*   r     r  c                   S   r  )Nzonly 2D padding supportedr)   r)   r)   r)   r*   r     r  c                   S   r  )Nzonly 2D stride supportedr)   r)   r)   r)   r*   r     r  Tc                    r  )Nc                 s   r  r  r)   r  r)   r)   r*   r    r  z1col2im.<locals>.check_positive.<locals>.<genexpr>c                 s   r  r  r)   r  r)   r)   r*   r    r  c                      r  r  r)   r)   r  r)   r*   r     r  z0col2im.<locals>.check_positive.<locals>.<lambda>r  r  r)   r  r*   r    r  zcol2im.<locals>.check_positiver  r  r  Fr  rM  r  )r"   r   c                 s   r  r  r)   r  r)   r)   r*   r     r  zcol2im.<locals>.<genexpr>r2  c                      r  )NzmExpected 2D or 3D (batch mode) tensor for input with possible 0 batch size and non-zero dimensions, but got: r  r)   r  r)   r*   r     r  r   r!   c                      s   dd  d  S )Nz|Expected size of input's first non-batch dimension to be divisible by the product of kernel_size, but got input.shape[-2] = r2  z and kernel_size=r)   r)   )r  r   r)   r*   r     s
    c                 S   s:   g | ]\}}}}}d |d|  ||d    d  |  qS r!   r"   r)   r  r)   r)   r*   r4     s    "zcol2im.<locals>.<listcomp>rT   c                      4   d d d d d d  dd  d	S 
NzGiven output_size=r  r  r  r  z , expected input.size(-1) to be 	 but got rT   .r)   r)   Lr  r  r  r  r   rM  r)   r*   r         c                      r  r  r)   r)   r  r)   r*   r     r  r   r   r  c                 S   s   g | ]
\}}|d |  qS r"   r)   )r1   or,  r)   r)   r*   r4   6      
accumulater  )rA   r   rY  r   r   r  rV   r  r  r  re  rX   rE  prodr   _unsafe_index_putr   r  r  )r   r  r  r  r  rM  r  rT  prod_kernel_sizecolr  out_hout_wr  r  r  r  r  r  r  r  indices_rowindices_coloutput_padded_sizer~  rq  r)   r  r*   col2im  s   




 



"

r  rr  c                 C   s$   | | | |  jt| d}|S Nr0  )type_asrh  rF   r   )rk   rr  rm   rK   r)   r)   r*   native_dropout_backwardC  s   	r  
input_size	dimensionr  c           	      C   s   t |dkrt| dS tt ||}tj|| | jtjd}|d||	 }| 
d|d 	||d } | |}d| |f }tj||| dd S )Nr   rf  rT   r!   r7   Tr  )rY  rA   squeeze_copyrF   r  ri  re  int32unfoldflattenmovedimrE  r   r  r  )	r   r  r  r  rD  rQ   rq  rY   rt  r)   r)   r*   unfold_backwardR  s   
r  epsc              	   C   st   |d ur|}d| }t t ||k||k| |d|   dS t t |dk|dk| |d|   |dtdS )Nrf   r   r)   nan)rA   rh   rl  r$  r   )rk   ry   r  lohir)   r)   r*   logit_backwarde  s   r  trainc                 C   s&   |r|dkrt | ||d S |  S rq   )r   native_dropoutrh  )r   r,  r  r)   r)   r*   dropoutz  s   r  out0out1c                 C   s   |r6|dkr6|dkrt | t j| t jdfS | jjstdt | |k}||  tdd|   }||fS | t j| t jdfS )Nr   r!   r   z?result type Float can't be cast to the desired output type Longrf   )	rA   r   rk  r   is_floating_pointrK  	rand_liker   r4  )r   r,  r  	bool_maskresr)   r)   r*   r    s   r  half_to_floatc           	      C   s   ddl m} |  } |r| jtjksJ tj| tjj	d\}}| 
|} ||  dkr2t| }ntj| |dd}t| | }|tj||dd }|sR|
|}|S Nr   guard_or_falser5   T)r   )rJ  r  r  r   rA   halfrF   rG   ELEMENTWISE_TYPE_PROMOTION_KINDDEFAULTr9   r   rg   amaxr   )	r2   rQ   r  r  r<   r?   unnormalizedx_maxr   r)   r)   r*   _softmax  s    


r  )r   c           
      C   s   ddl m} |  } |r| jtjksJ tj| tjj	d\}}| 
|} ||  dkr/| }ntj| |dd}| | }ttjt||dd}|| }	|sT|	
|}	|	S r  )rJ  r  r  r   rA   r
  rF   rG   r  r  r9   r   r  r%  r   rg   )
r2   rQ   r  r  r<   r?   shiftedr  shifted_logsumexpr   r)   r)   r*   _log_softmax  s"   


r  rT   rp  padding_idxscale_grad_by_freqsparsec                 C   sJ   |   dks
J d|jdkr!| d|}|jdkr|d}|S | | S )Nr"   z'weight' must be 2-Dr!   r   )rQ   rT  index_selectr  )r   rp  r  r  r  r   r)   r)   r*   	embedding  s   	


r  num_weightsc                 C   s   t j| t jjd\}}| |} t|tj}|r8||f}t	|}t
j||g|dd}|| }	| |	d } t||k| j}
| |
d}| |f| j|jd   }t
j||g|dd|S )Nr	  Tr  rT   r   )rF   rG   r  r  r9   r   rA   longrE  r4  r   r  rV   rX   rT  masked_fillr   )rk   rp  r  r  r  r<   r?   countsrj  grad_weights_scalerr  r   grad_weightr)   r)   r*   embedding_dense_backward  s&   	


r  c                 C   s   d}| D ]}||9 }q|S r\   r)   )r2   rK   ir)   r)   r*   r    s   
r  tensors
num_chunksc           	      C   s   g }| D ]H}|  }|| | d | | }||| kr7dgd |j| d  d|||  g }t||d}|d | t|dg }||| q|S )Nr!   r   r"   rT   )r  rT  r   constant_pad_ndrA   Sizeappendr  )	r!  rQ   r"  padded_tensorstensortensor_sizepad_along_dimr  	view_sizer)   r)   r*   
_pad_chunk  s   
r+  c                 C   s(   | d j }| D ]
}|j |kr dS qdS )Nr   FTrT  )r!  rT  r'  r)   r)   r*   have_same_ndims  s   

r-  c                 C   sB   | d   d | }| D ]}t|  d | |kdd  qd S )Nr   c                   S   r  )NzG_chunk_cat expects same sizes of 0,...,dim-1 dimensions for all tensorsr)   r)   r)   r)   r*   r   %  r  z+leading_dimension_matches.<locals>.<lambda>)r  rA   r   )r!  rQ   leading_dim_sizesr'  r)   r)   r*   leading_dimension_matches   s   r/  c                 C   s   t |dkdd  t t| dkdd  | d j}| d j}| D ]$}t | dkdd  t |j|kdd  t |j|kdd  q"t| rVt| d 	 |}nt |dkd	d  | D ]}t ||j
k d
d  qbt| | |S )Nr!   c                   S   r  )Nz&_chunk_cat expects positive num_chunksr)   r)   r)   r)   r*   r   .  r  z._preprocess_chunk_cat_inputs.<locals>.<lambda>r   c                   S   r  )Nz0_chunk_cat expects a non-empty input tensor listr)   r)   r)   r)   r*   r   0  r  c                   S   r  )Nz#_chunk_cat expects non-empty tensorr)   r)   r)   r)   r*   r   5  r  c                   S   r  )Nz8_chunk_cat expects all input tensors with the same dtyper)   r)   r)   r)   r*   r   8  r  c                   S   r  )Nz8_chunk_cat expects all inputs tensors on the same devicer)   r)   r)   r)   r*   r   <  r  c                   S   r  )NzK_chunk_cat expects non-negative dim when input tensors have different ndimsr)   r)   r)   r)   r*   r   C  r  c                   S   r  )Nz3_chunk_cat expects dim < ndim for all input tensorsr)   r)   r)   r)   r*   r   H  r  )rA   r   rY  r   re  r   r-  rF   r  rQ   rT  r/  )r!  rQ   r"  expected_dtypeexpected_devicer'  r)   r)   r*   _preprocess_chunk_cat_inputs)  s:   


r2  r   c                 C   sH   t | ||}t| ||}|d u rt||d S tj||d |d |S )Nr!   )r   )r2  r+  rA   r  )r!  rQ   r"  r   r&  r)   r)   r*   
_chunk_catN  s   r3  split_sizesc                 C   sX   t j| ||d}|d u rdd |D S t||D ]\}}t||j t||dd qd S )Nr}   c                 S   s   g | ]	}|j tjd qS )r0  )rh  rA   r5  )r1   sr)   r)   r*   r4   j  s    z)split_with_sizes_copy.<locals>.<listcomp>Tr   )r   split_with_sizesr  r   r   r   )ry   r4  rQ   r   splitsr~  splitr)   r)   r*   split_with_sizes_copy_  s   	r9  
split_size.c                 C      t j| ||S r7   )r   r8  r   )r   r:  rQ   r)   r)   r*   unsafe_splitr     r<  c                 C   r;  r7   )r   r6  r]  )r   r4  rQ   r)   r)   r*   unsafe_split_with_sizesw  s   r>  c                    s   | j }|| } dkr|dksJ |  fS |  d   }ddlm} ||} fddt|D }  | |  |d< t| ||S )Nr   r!   )	guard_intc                       g | ]} qS r)   r)   r1   r   r:  r)   r*   r4     r   zsplit.<locals>.<listcomp>rT   )r   detachrJ  r?  rU   rA   r8  )ry   r:  rQ   rA  r_  chunksr?  r4  r)   rB  r*   r8  ~  s   
r8  tensor_indices_or_sectionsc                    s   |j jdksJ |jtjksJ |  t dkp dk fdd  dkr9| }t|t	s3J | 
||S t}tj  }rJ|j }rJ|j}|  dd |D }W d    n1 s_w   Y  | 
||S )Ncpur!   r   c                      s   d  dS )Nz{tensor_split expected tensor_indices_or_sections to be a zero-dimensional or one-dimensional tensor, but got a tensor with z dimsr)   r)   	split_dimr)   r*   r     s    zAtensor_split_tensor_indices_or_sections_py_impl.<locals>.<lambda>c                 S   s   g | ]}|  qS r)   )itemrA  r)   r)   r*   r4         zCtensor_split_tensor_indices_or_sections_py_impl.<locals>.<listcomp>)re  typer   rA   r  rQ   r   rI  r/   r   tensor_splitr   _guardsdetect_fake_mode	shape_envignore_fresh_unbacked_symbols)ry   rE  rQ   sectionsctx	fake_moderO  rp  r)   rG  r*   /tensor_split_tensor_indices_or_sections_py_impl  s(   
rT  mat1mat2c                 C   sH   |   s|  st|}t|}|t|| }|dkr|S |||   S rq   )r  
is_complexra  rA   mm)ry   rU  rV  rc   rl   r   r)   r)   r*   addmm  s   rY  use_geluc                 C   s<   t | ||||}|r| jrtj|ddS t|S t|S )Nr   )r   )rY  is_cudar   gelurelu)ry   rU  rV  rc   rl   rZ  r   r)   r)   r*   _addmm_activation  s   

r^  vecc                 C   s\   |   s|  st|}t|}|t|| }|dkr|S | dkr(||  S |||   S rq   )r  rW  ra  rA   mvr   )ry   rU  r_  rc   rl   r   r)   r)   r*   addmv  s   ra  r   rstdgammaNCHxWgroupoutput_maskc
              	      s  t j| ||dd t j|| dd t j|dd t|    k fdd tjfkfdd td u pJ  k fdd   }
t |
 k fdd t| | j	dgd	}|  j	dgd	}d }d }d }|	d
 r9d|
  }d urt|
d
|
	d}t|
d
|
	d}t|
dd|
}n&||
	d}||
	d}t|
dtjd|
f|jd}| | | | | | }|  || |  }|
d}t|d}t|d}t| |
|t||
| | }||j|j}|	d r^||
||

d  |
d j	d
gd	 }|	d rj|j	d
gd	}|||fS )NF)allow_cpu_scalar_tensorsc                      s   d    dS )NzExpect input to have z	 elementsr)   r)   )re  rf  rd  r)   r*   r   	  rJ  z,native_group_norm_backward.<locals>.<lambda>c                      s   d  d dj  S )NzExpect mean to have shape (, z
, but got r  r)   )rd  rg  r   r)   r*   r         c                      s$   d  dd ur   S d S )NzExpect gamma to have z elements but got rT   )r   r)   )re  rc  r)   r*   r        $ c                      s   d  d S )NzExpect number of channels z, to be evenly-divisible by number of groups r)   r)   )re  rg  r)   r*   r     r   r"   r}   r   rf   rT   r!   rd  r   )rF   check_same_devicecheck_same_shaperA   r   r   r   r   rm  r   rV   r  rj  re  rX   r9   r   )rk   r   r   rb  rc  rd  re  rf  rg  rh  cpgdsdbd_inputd_gammad_biasr5  ds_valdb_valc1c2c3r)   )re  rf  rd  rc  rg  r   r*   native_group_norm_backward  s   

 
""



$

rz  out2c
                C   d   t | |||||||||	
}|
||f}t|D ]\}}|d ur/t|| |j t||| dd q|S r   )rz  	enumerater   r   r   )rk   r   r   rb  rc  rd  re  rf  rg  rh  r  r   r{  r   rY   r   rK   r)   r)   r*   native_group_norm_backward_outM  s   
r~  c                 C   s   | d ur	|  |S | S r7   r9   )r2   r   r)   r)   r*   _maybe_castj  s   
r  grad_outnormalized_shapebiasc           "         s  |j }| }	t|j  fdd| |||fD \}
}}}|
d us$J |	t| }||d  }|d | }g }g }t|	D ]}||krJ|| q>|| q>t|}t|}ddl	m
} ||dksj||dkr|d rs||nd |d r|||d  nd |d r|||d  fS d fS t|| }t|| }|d usJ || | }|d ur|
| }n|
}|| }t||d}t||}t||d}t||}|| | }d }d } d }!|d r|| | }|d r|d urt|dkrt|
| |d} n|
| } |d r(|d ur(t|dkr$t|
|d}!n|
 }!t||jt| |d ur7|jnd t|!|d urE|jfS d fS )	Nc                 3   s,    | ]}|d ur|j  tjdn|V  qd S r  )r9   rA   r5  r0   r;   r)   r*   r    s    
z-native_layer_norm_backward.<locals>.<genexpr>r   rH  r!   r"   TF)r   rQ   rF   get_computation_dtyper   rY  rU   r%  r  rJ  rI  rE  rX   rA   r   r   rh  r  )"r  r   r  r   rb  r   r  rh  input_shape
input_ndimgrad_out_cast
input_castweight_cast	bias_castaxis
inner_dims
outer_dimsinner_dim_indicesouter_dim_indicesr   rd  MrI  x_hat
grad_x_hatabrw  rx  ry  rM   rr  d_weightrt  r)   r;   r*   native_layer_norm_backwardq  st   



r  c             	   C   s`   t | |||||||}||	|
f}t|D ]\}}|d ur-t|| |j t||| dd q|S r   )r  r}  r   r   r   )r  r   r  r   rb  r   r  rh  r  r   r{  r   rY   r   rK   r)   r)   r*   native_layer_norm_backward_out  s   
r  c                 C   s  g }t t|D ]}||  | d  qt| j}| |}|d u r=|tj	tj
fv r5ttj	j}n
ttjj}n|}ttjjjt|dj|dd|}	||	}
|d urb|
|}
| jpk|d uok|j}t| }|tjtjfv }|s|s|
 }
|	 }	|
| }||	fS )Nr!   r"   Tr   )rU   rY  r%  rQ   rF   r  r   r9   rA   r   r   finfor  r   rsqrtopsr   addScalarr3  r   r   	is_nestedr   channels_lastchannels_last_3dr  r  )r   r  r   r  dims_to_reducer   r<   upcasted_inputeps_valrqrst_inputupcasted_resultr  r1  is_channels_lastr   r)   r)   r*   _fused_rms_norm  s:   





r  c                 C   s  |j }| }t|j}| j|tjd}	|j|tjd}
|d ur)|j|tjdnd }|	d us1J |t| }||d  }|d | }g }g }t	|D ]}||krW|
| qK|
| qKt|}t|}ddlm} ||dksw||dkr|d r||nd |d r|||d  fS d fS t||
 }|d ur|	| }n|	}d }d }|
| }|d rtj|| |dd}||| |  | }|d r|d ur|	| }t|dkrtj||dd}n|}t||jt||jfS )Nr0  r   r  r!   Tr   F)r   rQ   rF   r  r   r9   rA   r5  rY  rU   r%  r  rJ  r  rE  rX   r   r  )r  r   r  rb  r   rh  r  r  r<   r  r  r  r  r  r  r  r  r   rd  r  r  r  rr  r  r  sum_vald_weight_full_shaper)   r)   r*   _fused_rms_norm_backward  sd   	


r  running_meanrunning_varmomentum
functionalc	                 C   sT  dgt td|   }	t| j}
|}|}|rt| j}
| j|
d}tj||	ddd\}}t	|| }| | | }t
||	}t
||	}|d ur]|| d| |  }|s]|| |d ur|  | jd  }t
||	}|||d   }|| d| |  }|s|| nT|d ur|d usJ |j|
dd}|}|j|
dd}|}|}dt||  }| jjdkr|}|}n
| d	}| d	}t||  d }t||  d }| | | }|d ur| }t||  d }|| }|d ur	| }t||  d }|| }| jjdkr|j| jd}|j| jd}|j| jd||||fS )
Nr   r"   r   T)rQ   
correctionr   r!   )r   r   rF  r   )rL  rU   rQ   rF   r  r   r9   rA   var_meanr  r  copy_r   r   r9  re  rK  rE  rX   r  )r   r   r  r  r  r   r  r  r  reduction_dimsr<   new_running_meannew_running_var	input_acc
biased_varr   rb  r~  	save_mean	save_rstdnsqueezed_varunbiased_varinvstdr)   r)   r*   native_batch_norm_helperb  st   





r  r  save_invstdc              
   C   ,   t | |||||||d	\}}	}
}}||	|
fS NFr  r   r   r  r  r  r   r  r  r~  r  r  rW   r)   r)   r*   native_batch_norm  s   
r  c              
   C   sv   |d u r|d u rt | |||||S |d u rtd|d u r"td|r0t | |||||||S t | ||||||S )Nz`running_mean is None, but running_var is provided. They should both be None or both be provided.z`running_var is None, but running_mean is provided. They should both be None or both be provided.)r   _native_batch_norm_legitrK  $_native_batch_norm_legit_no_training)r   r   r  r  r  r   r  r  r)   r)   r*   native_batch_norm_decomposition  s&   r  c                    s|   |  |}|| d |   dkr4|dkr4 fdd|D }  | |  ||d < tjjj| ||S tjjj|  |S )Nr!   r   c                    r@  r)   r)   r  rB  r)   r*   r4     r   z(unsafe_chunk_py_impl.<locals>.<listcomp>)r  rA   r  r   r>  r]  r<  r   )r'  rD  rQ   r_  r4  r)   rB  r*   unsafe_chunk_py_impl  s   
r  c              
   C   s   t j| ||||d||S r  )r   r  r]  )r   r   r  r  r  r  r  r)   r)   r*   r    s   
r  c              
   C   r  r  r  r  r)   r)   r*   r    s   
r  c           
   
   C   s,   t | ||d d |||d	\}}}}	}	|||fS r  r  )
r   r   r  r   r  r  r~  r  r  rW   r)   r)   r*   !_native_batch_norm_legit_no_stats(  s   	
r  c              
   C   sP   t | |||||||d	\}}	}
}}|d usJ d|d us!J d||	|
||fS )NT#new_running_mean should not be None"new_running_var should not be Noner  )r   r   r  r  r  r   r  r  r~  r  r  r  r  r)   r)   r*   #_native_batch_norm_legit_functional7  s   r  c           	   	   C   sP   t j| ||||d|}d}|t jjjkrt j| |}t j|t j| j| j	dS )a  
    Return a reserve tensor for batch norm, used only by cudnn to pass forward state to the
    backward pass. This is needed for `_batch_norm_with_update` and `_batch_norm_no_update`,
    which support a variety of backends including cudnn. We create this tensor here to get
    the correct shape in the traced graph if we detect that will call the cudnn kernel,
    and rely on DCE to avoid materializing this tensor.
    Tr   )r   layoutre  )
rA   _C_select_batch_norm_backend_BatchNormBackendCudnn(_get_cudnn_batch_norm_reserve_space_sizeemptyuint8r  re  )	r   r   r  r  r  r  r   backendreserve_sizer)   r)   r*   _get_batch_norm_reserve_tensorP  s   r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NTFr   r  r  r   r   r  r  r  r  r  r~  r  r  rW   reserver)   r)   r*   _batch_norm_with_updatem     
r  c              
   C   sh   t | ||||d||d	\}}}	}
}t| |||||dd}|
d us$J d|d us,J d|||	||
|fS )NTr  r  r  r  )r   r   r  r  r  r  r  r~  r  r  new_rmnew_rvr  r)   r)   r*   "_batch_norm_with_update_functional  s   r  c              
   C   sD   t | ||||d||d	\}}}	}
}
t| |||||dd}|||	|fS )NFr  r  r  r)   r)   r*   _batch_norm_no_update  r  r  c                 C   sB   |d u sJ t | |k jt jd}|| |  d|  }||fS )Nr   rf   )rA   r  r9   r  r  )r   r,  	generatorrr  r  r)   r)   r*   _fused_dropout_decomposition  s   r  )r   r  re  
pin_memorynon_blockingr1  re  r  r  r1  c          	      C   s
  |r|t jksJ d|rJ dt| t jttttfsJ |d u r6|d u r6|d u r6t| t jr4|  S | S d}t| t jrA| }nt 	| }|d uri||j
kri|d ura|jdkrat j||}d}t j|||}|d urx|sxt j||}d}|d urt j||dS |S )NTODOFrF  Tr0  )rA   stridedr/   r   ra  r   rk  complexrh  scalar_tensorre  rK  _primsconvert_element_type
device_put)	r2   r   r  re  r  r  r1  dtype_convertedx_tensorr)   r)   r*   _to_copy  s,   
r  c                 C   s
   t | S r7   )r   aliasr:   r)   r)   r*   nop_decomposition  s   
r  out3exponential_average_factorepsilonc              
   C   s^   t | |||||||\}}	}
|r||	|
| jdtjdfS ||d|d| jdtjdfS )Nr  r   )r   r  rE  rA   r  )r   r   r  r  r  r   r  r  r  r  r  r)   r)   r*   cudnn_batch_norm  s"   
r  c                 C   s@   t |D ]\}}|dkr|| jk r| j| |ks| |} q| S r\   )r}  rT  r   rV   )r2   broadcast_maskr  rr  r)   r)   r*   _broadcast_batch_norm_backward 	  s
    
r  r  c                 C   s   t | |||||||||	
S r7   )native_batch_norm_backward)r  r   r   r  r  r  r  r  r  rh  r  r)   r)   r*   batch_norm_backward'	  s   r  c
           &         s  |j }
|d ur|j }n|
}t|j   fdd| ||||||fD \}}}}}}}|j}| }|dks9J dd}tt|||  }|}|}|rV|d urS|d usUJ n|d ur^|d us`J |}t|| }dg| }|| ||< g }t	|D ]}||kr|
| qzt||}d| }t||}t|||  |}t|| |}tt|| || |} |d u rt||d }!nt|| |}!|r|| |  }"||" | |! }#n||! }#|	d r|| }$nd }$|	d r|}%nd }%|#|
t|$|t|%|fS )Nc                 3   s&    | ]}|d ur|  n|V  qd S r7   r  r0   r;   r)   r*   r  ^	  s
    
z-native_batch_norm_backward.<locals>.<genexpr>r"   z$rank of the input must be at least 2r!   rf   )r   rF   r  r   rQ   r  rL  rA   r  rU   r%  r  r   r   r9   r  )&r  r   r   r  r  r  r  r  r  rh  r|  weight_dtyper  r  r  running_mean_castrunning_var_castsave_mean_castsave_invstd_castr  
input_rankr  num_featuresr   r  r  reduction_axesr   r   grad_output_sumdot_p	grad_mean
proj_scale
grad_scaleprojrY   r  	grad_biasr)   r;   r*   r  C	  s   
	



r  c
                C   r|  r   )r  r}  r   r   r   )r  r   r   r  r  r  r  r  r  rh  r  r   r{  r   rY   r   rK   r)   r)   r*   native_batch_norm_backward_out	  s&   
r  save_varc                 C       t || |||||d|g d
S NT)TTTr   r  )r   rk   r   r  r  r  r	  r  r)   r)   r*   miopen_batch_norm_backward	  s   r  reserveSpacec	           	      C   r
  r  r  )	r   rk   r   r  r  r  r	  r  r  r)   r)   r*   cudnn_batch_norm_backward	  s   r  c                    s  | j  | jttdv fdd | jdd  D ]}t|dkfdd qd |d  dkrjd |d  dkrjtdd	 tdd  |D }td
d	 tdd  ||D }tjj	| ||S dd dd  fdd}|d |d \}}}}	|d |d \}
}}}| dt
|d|
f }|	s|stj|ddS dd }|||||	dd\}}|||||dd\}}d }tt|jd t|jd D ]\}}|d u r|d|d d |f }q||d|d d |f  }q|||  S )Nr  c                      
   d  S )Nz9adaptive_avg_pool2d(): Expected 3D or 4D tensor, but got r)   r)   r,  r)   r*   r   
     
 z%adaptive_avg_pool2d.<locals>.<lambda>r2  r   c                         dt   dS )Nzjadaptive_avg_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has shape r  r  r)   r  r)   r*   r   
  s    rT   c                 s   s    | ]	\}}|| V  qd S r7   r)   )r1   r   r  r)   r)   r*   r  
      z&adaptive_avg_pool2d.<locals>.<genexpr>c                 s   s&    | ]\}}}||d  |  V  qdS r!   Nr)   )r1   r   r  r5  r)   r)   r*   r  
  s    
c                 S   s   t j| | |ddS )Ntruncrounding_moderA   divr  r  r  r)   r)   r*   start_index"
  s   z(adaptive_avg_pool2d.<locals>.start_indexc                 S   s    t j| d | | d |ddS )Nr!   r  r  r  r  r)   r)   r*   	end_index%
      z&adaptive_avg_pool2d.<locals>.end_indexc                    s   t j| t jd}||| }| | d }| | }|dkp"|| dk }|r+|d7 }n|dkr3|d8 }t j| t jd}|d| }|rbt j| d |j|jd}	t ||	}||| }
|
| }n|}||||fS )Nrf  r!   r   rT   r  )rA   ri  r  rV   r  r   re  minimum)in_sizeout_sizeorangei0	maxlengthin_size_modadaptive	range_maxrq  maxvali1length)re  r  r  r)   r*   compute_idx(
  s(   

z(adaptive_avg_pool2d.<locals>.compute_idx.r   )r   rT   r}   c                 S   s`   t |tr	| |fS |dk sJ ||dk}|dkrt|d}t| |d} t|| }| |fS )Nr   rT   r2  r   r   )r/   r   rV   rX   rA   r  )valsr)  r&  r%  rQ   rr  r)   r)   r*   
maybe_maskP
  s   

z'adaptive_avg_pool2d.<locals>.maybe_mask)r%  rQ   r   )re  r   rY  rA   r   r  r  nnr  
avg_pool2drX   r   r	   rU   )r   r  r  rM  kernelr*  idxhlength_hrange_max_h
adaptive_hidxwlength_wrange_max_w
adaptive_wr+  r,  retr   jr)   )re  r  rT  r   r  r*   adaptive_avg_pool2d
  sN   

(  



&r:  c           	      C   s   t d| d ttj| jd |  }ttj|}dg| j }| jd |  |d | < |tj|| j	d
||  d}| t| jd |  t| }tj|d|g| ddd
|jS )Nmax_unpoolingd_forward_outr!   rd  rT   Fr  )rF   alert_not_deterministicr   operatorr   r   rT  r   ri  re  rm  r  rE  rL  r  )	ry   rp  r  rQ   nchwindices_nc_shapeindices_flatr~  r)   r)   r*   _max_unpoolndp
  s   	"rC  c                    s   t jt jkfdd t tdkfdd t jdv fdd t jjkfdd tdjD ] t  d	k fd
d q>t	dS )Nc                         d j  S )Nz2elements in indices should be type int64 but got: r   r)   )rp  r)   r*   r   
      zmax_unpool2d.<locals>.<lambda>r"   c                      r  )NzMThere should be exactly two elements (height, width) in output_size, but got 
 elements.rY  r)   r  r)   r*   r   
     r  c                         d j  dS )NzLInput to max_unpooling2d should be a 3d or 4d Tensor, but got a tensor with  dimensions.r,  r)   r   r)   r*   r   
  s   c                         dj  d j  S NzBExpected shape of indices to be same as that of the input tensor (z%) but got indices tensor with shape: r  r)   )rp  ry   r)   r*   r   
     
r!   r   c                         dj  d  dS )NzZmax_unpooling2d(): Expected input to have non-zero size for non-batch dimensions, but got  with dimension  being empty.r  r)   )r   ry   r)   r*   r   
  
   )
rA   r   r   r  rY  rT  r   rU   r  rC  )ry   rp  r  r)   )r   rp  r  ry   r*   max_unpool2d
  s,   





	rS  c                    s  t jt jkdd  t jdv fdd t tdkfdd t tdkfdd t tdkfdd t jjkfd	d td
jD ] t  dk fdd qXt d dko~d
 dko~d dkfdd t	dS )Nc                   S   r  )Nz(elements in indices should be type int64r)   r)   r)   r)   r*   r   
  r  zmax_unpool3d.<locals>.<lambda>r   r  c                      rJ  )NzLInput to max_unpooling3d should be a 4d or 5d Tensor, but got a tensor with rK  r,  r)   r   r)   r*   r   
  r  r   c                      r  )NzVThere should be exactly three elements (depth, height, width) in output_size, but got rF  rG  r)   rH  r)   r*   r   
  rI  c                      r  )NzRThere should be exactly three elements (depth, height, width) in stride, but got: rF  rG  r)   rM  r)   r*   r   
  r   c                      r  )NzSThere should be exactly three elements (depth, height, width) in padding, but got: rF  rG  r)   )r  r)   r*   r   
  r   c                      rL  rM  r  r)   )rp  r   r)   r*   r   
  rN  r!   r   c                      rO  )NzZmax_unpooling3d(): Expected input to have non-zero size for non-batch dimensions, but got rP  rQ  r  r)   )r   r   r)   r*   r   
  rR  r"   c                      r  )Nz5strides should be greater than zero, but got stride: r)   r)   rV  r)   r*   r   
  r  )
rA   r   r   r  rT  rY  r   rU   r  rC  )r   rp  r  rM  r  r)   )r   rp  r   r  r  rM  r*   max_unpool3d
  sB   	







	"
rW  )rl   r'  c                C      t | |||d|dS )NTinplacerl   
_index_addr2   rQ   rt  r'  rl   r)   r)   r*   
index_add_
  s   	r^  c                C   rX  )NFrY  r[  r]  r)   r)   r*   	index_add
  s   
r_  rZ  c                   s"  t | jtjdkfdd jdkrdnd|jdkr*|ndtkfdd  dkr]t | jttu pQt 	t
  fdd |  }| jdk}|ri| dn| }d f }|rwtjntj}	|	|||dd	}
|r| S |r|
dS |
 S )
Nr!   c                      rJ  Nz(Index should have dimension 1 or 0 (got r  r,  r)   rt  r)   r*   r     r  z_index_add.<locals>.<lambda>r   c                      s   d d d S )NzNumber of indices (z') should be equal to tensor.size(dim) (z), for dim=r)   r)   )rQ   
index_sizer(  r)   r*   r         c                      s   dt   d dS )Nzalpha argument of type z cannot be safely cast to type !)rK  r)   )rl   python_typer)   r*   r   "  rc  r7   Tr  )rF   canonicalize_dimsrT  rA   r   r  dtype_to_typer   rk  is_weakly_lesser_typerK  rV   r   
index_put_	index_putr  r  )r2   rQ   rt  r'  rZ  rl   zero_dimr.  rq  rj  r   r)   )rl   rQ   rt  rb  re  r(  r*   r\  	  s6   	

r\  r   c              
   C   s   t t| dkdd  t| }| d  }|dd  }tdd | D }|r,||f}n||f}|| }| d ||}dt| }	t|D ]+}
| |
 }t||	d||d f |}|rhtj	||d|
d}qFtj	||d|
d}qF|S )	Nr   c                   S   r  )Nz#received an empty list of sequencesr)   r)   r)   r)   r*   r   4  r  zpad_sequence.<locals>.<lambda>r!   c                 s   s    | ]}| d V  qdS r  r  r0   r)   r)   r*   r  8      zpad_sequence.<locals>.<genexpr>)r   r   rQ   rt  )
rA   r   rY  r  r   r$  rU   r   r#  ru  )	sequencesbatch_firstpadding_valuesequences_sizemax_sizetrailing_dimsmax_lenout_dimsr   dim_paddingsr   currseqrowr)   r)   r*   pad_sequence1  s(   
rz  c                 C      t | |||ddS )NTrZ  _index_copyr2   rQ   rt  r'  r)   r)   r*   index_copy_L     r  c                 C   r{  )NFr|  r}  r  r)   r)   r*   
index_copyQ  r   r  c          
         s   t | j|}t jdk fdd | jdk}|r | dn| } jdkr, dn  d|  f }|r:tjntj}||||}	|rG| S |rN|		dS |	
 S )Nr!   c                      rJ  r`  r,  r)   ra  r)   r*   r   ]  r  z_index_copy.<locals>.<lambda>r   r7   )rF   rf  rT  rA   r   rV   r   ri  rj  r  r  )
r2   rQ   rt  r'  rZ  rk  r.  rq  rj  r   r)   ra  r*   r~  W  s   

r~  c                 C   sR   t | d| }t t |  }| js| jr| d}n|}|t | |fS )Nr)   r  )rA   r  rE  rg   r   r[  is_xpur#  )ry   r   ri   r   r)   r)   r*   log_sigmoid_forwardm  s   r  lowhighr  c                 C   s$   t j| jt|t|| j| j|dS )N)r  r  r   re  r  )prims_uniform_helperr   r   r   re  )r2   r  r  r  r)   r)   r*   uniformz  s   r  c                 C   s   |  t| |||S r7   )r  r  )ry   r  r  r  r)   r)   r*   uniform_  s   r  c                 C   s   t | d }|d ur"t|d u dd  tt ||kdd  |S |d urjt|d u dd  tt ||kdd  g }t|D ]%\}}t||krZ|| |d  t|  qB|t| |d  |  qB|S tddd  d S )	Nr"   c                   S   r  Nz9Must specify exactly one of output_size and scale_factorsr)   r)   r)   r)   r*   r     r  z.upsample_compute_output_size.<locals>.<lambda>c                   S   r  N r)   r)   r)   r)   r*   r     r  c                   S   r  r  r)   r)   r)   r)   r*   r     r  c                   S   r  r  r)   r)   r)   r)   r*   r     r  Fc                   S   r  r  r)   r)   r)   r)   r*   r     r  )rY  rA   r   r}  ra  r%  r   )r  r  scale_factorsspatial_dimensionsr   r5  r)   r)   r*   upsample_compute_output_size  s.   r  c                 C   s   | d u rd S | | S r7   r)   )scalesrq  r)   r)   r*   get_scale_value  s   r  r  c                 C   s2   t |  ||}|r|nd gt| }t| ||S r7   r  r  rY  _upsample_nearestr   r  r  osizer  r)   r)   r*   _upsample_nearest_vec  s   r  c                 C   s6   t |  ||}|r|nd gt| }t| ||ddS NTexactr  r  r)   r)   r*   _upsample_nearest_exact_vec  s   r  c                 C   s   g }t |}|r
dnd}t|D ]I}|| }| j| |  }	|| d ur,|	|	||   n|	| }
tj|tj| jd}|| |
 tj}t|d | D ]}|	d}qL|
| q|S )Nr   r   r  r!   rT   )rY  rU   r   rA   ri  r   re  r9   r  rV   r%  )r   r  r  r  rp  num_spatial_dimsrw  r  r  isizerm   output_indicesinput_indicesrW   r)   r)   r*   !_compute_upsample_nearest_indices  s   $r  )preserve_memory_formatr   r  c                 C   s   t | ||gS r7   r  r   r  r  r)   r)   r*   upsample_nearest1d  s   	r  c                 C   s   t | ||gddS r  r  r  r)   r)   r*   upsample_nearest_exact1d
     r  scales_hscales_wc                 C   s   t | |||gS r7   r  r   r  r  r  r)   r)   r*   upsample_nearest2d  s   
r  c                 C   s   t | |||gddS r  r  r  r)   r)   r*   _upsample_nearest_exact2d%  s   r  scales_dc                 C   s   t | ||||gS r7   r  r   r  r  r  r  r)   r)   r*   upsample_nearest3d4  r  r  c                 C   s   t | ||||gddS r  r  r  r)   r)   r*   _upsample_nearest_exact3dB  s   r  r  c           	      C   sp   t | |||d}d d g| }t| |}|jdkr6t| }| jd }| jjdkr0|dk r0t	j
}|j|d}|S )Nr  r   r!   cudar0  )r  r   _unsafe_indexrT  rF   r   r   re  rK  rA   r5  r  )	r   r  r  r  spatial_indicesrp  r   r1  
n_channelsr)   r)   r*   r  T  s   


r  c                    sb   |r|rd n|rd n|rd nd t   dks!J t  fddtdt  D S )Nr  r   r   r"   r   c                    s    g | ]}t ||   qS r)   r  rA  
group_sizeparamsr)   r*   r4   z  s    z!gather_params.<locals>.<listcomp>)rY  rU   )r  
has_biaseshas_projectionsr)   r  r*   gather_paramso  s   r  c                 C   sh   |r!| d|  |d|  }}| d| d  |d| d  }}n| | || }}d\}}||||fS )Nr"   r!   NNr)   )r  hiddensr   bidirectional
cur_params
cur_hiddenbidir_paramsbidir_hiddenr)   r)   r*   params_hiddens  s   $r  c                 C   s2   ||ksJ | | d|||  | dd|S rq   )r%  r  )r  last_batch_size
batch_sizer  r)   r)   r*   update_hidden_for_packed  s   r  c              	   C   s4   ||kr| S ||k sJ t | |d||| fS rq   )rA   concatr  )r  r  r  
inp_hiddenr)   r)   r*    update_hidden_for_packed_reverse  s   r  c                 C   s$  |d }|d }|r|d nd }	|r|d nd }
g }g }|r"|d n|d }| dd|}t| t|}|r>|d d d }|D ]-} | jd }||krLn|rVt||||}nt||||}|| |||	||
}|}|| q@|ru|  n	|| |  t	|d}|st	|dn|}||fS )Nr   r!   r"   r   rT   )
r  rA   r8  rL  r   r  r  r%  reverser  )inphiddenr  r  	hidden_fnbatch_sizesr  	ih_weight	hh_weightih_biashh_biasstep_outputr  r  r  	split_inpr   r   
hidden_outr)   r)   r*   one_layer_rnn_data  s@   


r  c                        fdd}|S )Nc                    s    t ||||  S r7   r   linearr   r  r  r  r  r  nonlinearityr)   r*   rM     s   zrnn_cell.<locals>.innerr)   r  rM   r)   r  r*   rnn_cell  s   r  c                    r  )Nc                    s$   t | ||}  t ||||  S r7   r  r  r  r)   r*   rM     s   zrnn_cell_data.<locals>.innerr)   r  r)   r  r*   rnn_cell_data  s   r  c                 C   s   |d }|d }|r|d nd }|r|d nd }	t | ||}
|r&|
dn|
}
|d}g }|
D ]}|||||||	}|| q1|rH|  t|d}||dfS )Nr   r!   r"   r   )	r   r  fliprV   r%  r  rA   r  r  )r  r  r  r  r  r  r  r  r  r  precomputed_inputr  r  r   r   r)   r)   r*   one_layer_rnn  s   
r  c                 C   s   |d }|d }|r|d }|d }nt | }t | }|d d}	|d d}
g }d}|	d}d}d}d}d}|  } |	 }	|
 }
t jjj| |||||	|
|||||||||}|d |d |d }}}||	d|	dffS )Nr   r!   r"   r   F)
rA   r   r  rV   r  r  r   mkldnn_rnn_layerr]  r  )r  r  r  r  r  w0w1w2w3hxcxr  modehidden_size
num_layersr  rp  r  outputsr[   hycyr)   r)   r*   mkldnn_one_layer_lstm  sN   


r  c
                 C   s   |r|  ddn| } g }
t|D ]^}t||||\}}}}|r'||d k r'|nd}|	| |||\}}|
| |rI|	| |||dd\}}|
| |rXt||g| d } n|} |dkrn|rn||d k rntj| |dd} q|rw|  ddn| } | |
fS )Nr   r!   r   T)r  )r  )	transposerU   r  r%  rA   r  rQ   r  )r   r  r  r  r  r  r  r  rp  layer_fnfinal_hiddensr   r  r  r  r  fwd_inp
fwd_hiddenbwd_inp
bwd_hiddenr)   r)   r*   _rnn_helper#  s,   



r  c	                 C   R   | d}	t||d}t| |	|||||||ttttjd
\}
}|
t|dfS Nr   Fr  )	unbindr  r  r   r  r  rA   r   stackr   r  r  r  r  r  r  r  rp  r  r   r  r)   r)   r*   rnn_tanh_inputL     
r  c	                 C   r  r  )	r  r  r  r   r  r  rA   r]  r  r  r)   r)   r*   rnn_relu_inputk  r  r  c	                 C   T   | d}	t||d}t| |	||||||dtt|ttjd
\}
}|
t|dfS Nr   Fr  r  )	r  r  r  r   r  r  rA   r]  r  datar  r  r  r  r  r  r  r  r  r   r  r)   r)   r*   rnn_relu_data  &   
r  c	                 C   r   r  )	r  r  r  r   r  r  rA   r   r  r  r)   r)   r*   rnn_tanh_data  r  r  c                 C   s   t ||||  }|d|}|d  }	|d  }
|d  }|d  }|
| |	|  }||  }|d u r;|nt ||d }||fS )Nr   r   r!   r"   r   r   r  chunkr   r   )r  r  r  r  r  	hr_weight	chunk_dimgateschunked_gatesin_gateforget_gate	cell_gateout_gater  r  r)   r)   r*   	lstm_cell  s   r  c              
   C   s   |d }|d }|r|d nd }|r|d nd }t |dkr"|d nt |dkr,|d nd }	|d d}
|d d}t| ||}|rJ|dn|}g }|D ]} t| |
||||	dd\}
}||
 qP|rk|  t	|d}||

d|
dffS )Nr   r!   r"   r   r  r   r  )rY  rV   r   r  r  r  r%  r  rA   r  r  )r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r   r)   r)   r*   one_layer_lstm  s$   *r  c              
   C   s
  |d }|d }|r|d nd }|r|d nd }	t |dkr"|d nt |dkr,|d nd }
g }g }|r8|d n|d }t| t|}|rM|d d d }|d }|d }|dd||dd|}}|D ]l} | jd }t| ||} ||k r||d||| |d||| f |dd||dd|}}||krt	||d||| fd}t	||d||| fd}t
| ||||	|
dd\}}|}|| qf|r|  ||f}n|||f |  t| \}}t|dt|df}t|d}||fS )	Nr   r!   r"   r   r  r   rT   r  )rY  rA   r8  rL  r  r   r   r  r%  r  r  r  r  r  )r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  orig_hxorig_cxr  r  r   r  hidden0hidden1r   r)   r)   r*   one_layer_lstm_data  s\   *

r  c                 C   s   dd }|| ||rt S tS )a*  Check whether we could use decompose lstm with mkldnn_rnn_layer.
    All the below conditions need to be met:
        * ``torch._C._get_mkldnn_enabled()`` returns ``True``.
        * All the input args are on CPU.
        * The dtypes of args are either torch.float or torch.bfloat16.
        * Inference.
        * ``has_projections`` returns ``False``.

    Args:
        * input: the input sequence to LSTM
        * hx: a tuple of the input hidden state and cell state ``(h_0, c_0)`` to LSTM
        * params: the weight and bias tensors of LSTM
    c           	      S   s   t j sdS | gt| tt| }dd |D }t|dkr$dS | }|t dkr1dS dd |D }|D ]}|t j	t j
fvrG dS q:| jrMdS |d d|d dk}|r_dS d	S )
NFc                 S      h | ]}|j qS r)   rd  r1   tr)   r)   r*   	<setcomp>M      zEselect_one_layer_lstm_function.<locals>.use_mkldnn.<locals>.<setcomp>r!   rF  c                 S   r  r)   r   r  r)   r)   r*   r  U  r  r   r"   T)rA   r  _get_mkldnn_enabledrL  r   from_iterablerY  popre  r   bfloat16requires_gradr  )	r   r  r  r!  devicesre  dtypesr   r  r)   r)   r*   
use_mkldnnH  s(   
z2select_one_layer_lstm_function.<locals>.use_mkldnn)r  r  )r   r  r  r&  r)   r)   r*   select_one_layer_lstm_function9  s   r'  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| ||}
t| |	||||||||

\}}tt| }|t|d dt|d dfS )Nr"   lstm expects two hidden statesr   r!   )	rY  r  r  rL  r  r'  r  rA   r  )r   r  r  r  r  r  r  r  rp  r  r  r   r  r)   r)   r*   	lstm_implk  s$   $"r)  c	                 C   s   t |dks
J dt|||d d|d dk}tt|d |d }	t| |	||||||dtt|d
\}
}tt| }|
t	|d dt	|d dfS )Nr"   r(  r   r!   F)r  )
rY  r  r  rL  r  r  r   r  rA   r  r  r)   r)   r*   lstm_data_impl  s"   $
"r*  c                 C   sr   |  dd}t||| dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr   r!   r"   r   )r	  r   r  r   r   r  r  r  r  r  r  chunked_igateschunked_hgates
reset_gate
input_gatenew_gater)   r)   r*   gru_cell  s   r1  c                 C   s|   t | ||dd}t |||dd}|d |d   }|d |d   }	|d |d |   }
||
 |	 |
 S )Nr   r!   r   r"   r  r+  r)   r)   r*   gru_cell_data  s   r2  c	                 C   sJ   t ||d}t| |d||||||dtt|td
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r2  rA   r  )r  r  r  r  r  r  r  r  r  r   r  r)   r)   r*   gru_impl_data  s   r3  c	                 C   sH   t ||d}t| |d|||||||tttd
\}	}
|	t|
dfS )NFr   r  )r  r  r  r   r  r1  rA   r  )r   r  r  r  r  r  r  r  rp  r   r  r)   r)   r*   gru_impl  s   
r4  c                 C   :   t |  ||}t|d}t|d}tjj| ||||S Nr   r!   )r  r  r  rA   r  r   _upsample_bilinear2d_aar   r  align_cornersr  r  scale_hscale_wr)   r)   r*   upsample_bilinear2d_aa_vec     


r<  c                 C   r5  r6  )r  r  r  rA   r  r   _upsample_bicubic2d_aar8  r)   r)   r*   upsample_bicubic2d_aa_vec  r=  r?  c                 C   s4   t |  ||}|r|nd gt| }t| |||S r7   )r  r  rY  _upsample_linear)r   r  r9  r  r  r  r)   r)   r*   _upsample_linear_vec  s   	rA  r9  c                 C   s   t | |||gS r7   r@  )r   r  r9  r  r)   r)   r*   upsample_linear1d"  s   rC  c                 C   s   t | ||||gS r7   rB  )r   r  r9  r  r  r)   r)   r*   upsample_bilinear2d-  s   rD  c                 C   s   t | |||||gS r7   rB  )r   r  r9  r  r  r  r)   r)   r*   upsample_trilinear3d<  s   rE  c                 C   s@   |r|dkr| d |d  S dS |d ur|dkrd| S | | S )Nr!   rf   r   r)   )r  r   r9  rm   r)   r)   r*   _compute_scaleM  s    rF  c                 C   s   |r| | S | |d  d S Nr   r)   )rm   	dst_indexr9  r)   r)   r*   _compute_source_indexT  s   rI  weightsweights_precisionc                 C   sB   t dd t| |D d|d >  }||? }t|ddtjS )Nc                 s   s,    | ]\}}| tj| tj V  qd S r7   )r9   rA   r  )r1   r5  r  r)   r)   r*   r  ^  s    
z%_sum_tensors_uint8.<locals>.<genexpr>r!   r      )_sum_tensorsr  rA   r   r9   r  )rc  rJ  rK  r~  r)   r)   r*   _sum_tensors_uint8[  s   
rN  c                 C   sJ   t |  }d}t j||jd}d|d|d >   }|dk}||  S )N   rd  r   r!   i   )rA   r  r   ri  re  r   )rJ  
max_weightmax_weight_precision
precisionsvaluesrr  r)   r)   r*   _compute_weight_precisione  s   rT  c                    s  j d }j dd  }t|tjtjjd\}fddfddtt|||D }tt| \}g }	t	ddgg  D ]# d d g fd	dt
D  }
t|
}t|}|	| qGtt
D ]'}|| |  d
dfddt|	d d d |	dd d D }	qqt|	dksJ |	d }t}jjdkr|dk rtj}t|tjsJ |j|d} s| }|S )Nr!   r"   r	  c           	         s   t | | |}tj|jdjd}t|| jdd}|j|jd gdg| R  }|tj	}|d j| d d}|||fS )Nrd  r   r   r   r   r!   r   )
rF  rA   ri  re  r9   rI  r   r  r   r  )	inp_sizer   r  nsqueezescale_factorr   x_f32r2   xp1)r9  r   r   r)   r*   
get_values  s   
z$_upsample_linear.<locals>.get_valuesc                    s,   g | ]\}\}}} |||d  | qS r  r)   )r1   r   rU  r   r  )rZ  n_dimsr)   r*   r4     s    z$_upsample_linear.<locals>.<listcomp>r   c                    s(   g | ]} | d kr| n| qS r  r)   )r1   k)r  xp1sxsr)   r*   r4        ( r   rf   c                    s$   g | ]\}}|t ||   qS r)   )rA   r   )r1   v1v2)xscaler)   r*   r4     s    r     r0  )r   rY  rF   rG   r  INT_TO_FLOATr}  r  rL  r	   rU   r   r  r   r%  reversedr   r9   r   re  rK  rA   r5  r/   r   r  r  round)r   r  r9  r  r  	inp_sizesrW   rS  xs_f32vsrq  vr   r   r1  r)   )	r  r9  r   rZ  r   r[  r]  r^  rb  r*   r@  n  sF   


"


r@  r  r  c                 C   s   | j |j kS r7   r  )r  r  r)   r)   r*   is_same_size  r|   rk  c                 G   rv   r7   )r   rm  )r2   r   rH   r)   r)   r*   _reshape_alias  s   rl  c                 C   rv   r7   )r   rt  )r2   rp  r)   r)   r*   r    r|   r  c                 C   s   t | |||S r7   )r   rj  )r2   rp  rz   r  r)   r)   r*   r    r=  r  c                 C   s   |D ]}|d urt |jt jt jfv dd  qt |jt jkdd  ddlm} ||  dkr@t j	
| |}| |j|S tt|D ]}|| }|d ur^|jd| |d d||< qFt| || |S )Nc                   S   r  Nz3tensors used as indices must be long or int tensorsr)   r)   r)   r)   r*   r     r  z&_unsafe_masked_index.<locals>.<lambda>c                   S   r  Nz*tensors used as masks must be bool tensorsr)   r)   r)   r)   r*   r     r  r   r  r!   r[  )rA   r   r   r  ra  rk  rJ  r  r   _meta_registrationsmeta_index_Tensorr$  r   rU   rY  r   r  r   r  r  )r2   rr  rp  fillrt  r  meta_resultr   r)   r)   r*   rn    s*   
rn  c                 C   s   |D ]}|d urt |jt jt jfv dd  qt |jt jkdd  |  dkr.|  S tt	|D ]}|| }|d urP|j
| | | |d d||< q4|| d}tj| ||ddS )	Nc                   S   r  rm  r)   r)   r)   r)   r*   r     r  z5_unsafe_masked_index_put_accumulate.<locals>.<lambda>c                   S   r  rn  r)   r)   r)   r)   r*   r     r  r   r!   r[  Tr  )rA   r   r   r  ra  rk  r   rh  rU   rY  r   r  r  r   r  )r2   rr  rp  rS  rt  r   masked_valuer)   r)   r*   #_unsafe_masked_index_put_accumulate  s(   
$rt  c                 C   sV  |   }d}|dk rd}|d ur,|dkr&dg| }|jd ||< ||}n|}| | } t||k|d}	|	|}
t| ||
| }t||k|d}|tj	j
krb|dkrb| dd}||fS |d ur|| j}t|||
|}t||k|d}| }n	||k | }|tjj
kr| }||fS |tjj
kr| | }||fS )Nr!   r"   r   r)   r   )rQ   r   rm  rA   rh   rV   gatherr  r    r&   rz   r$  rg  r   r9   r(   r'   )ry   r   r   r   r  r[  r  r   wr	  safe_target_r   r  wsumr)   r)   r*   _nll_loss_forward  sB   


ry  c                 C   s   |   dkr|   dksJ d|  dksJ d|   dko%|  dk}|s?| jd |jd ks?J d| j d|j d| jd	 }|d u s_|  dkrT| |ks_J d
| d|j t| ||||S )Nr   r"   r  r!   r  r  r  r  rT   z/weight tensor should be defined either for all z7 classes or no classes but got weight tensor of shape: )rQ   r   r   ry  )ry   r   r   r   r  r  	n_classesr)   r)   r*   nll_loss_forward9  s    	
"r{  c                 C   s   t | ||||S r7   )ry  )ry   r   r   r   r  r)   r)   r*   nll_loss2d_forwardV  s   	r|  Ac                 C   s    |d |  |d  |  |  d S )Nr"   r   r!   r)   r2   r}  r)   r)   r*   _upsample_cubic_convolution1d  r  r  c                 C   s(   ||  d|  |  d|  |  d|  S )Nr     r   r)   r~  r)   r)   r*   _upsample_cubic_convolution2h  s   (r  r  c           
      C   s   d}| j t dkrDtj| d|  gdd}tj| d d|  gdd}t||}t||}tj|dd\}}tj|dd\}}	|||	|fS t| d |t| |td|  |td|  |fS )Ng      rF  rf   r   r}   r   )re  rA   r  r  r  r  )
r  r}  tt1tt2w03w12r  r  r  r  r)   r)   r*    _upsample_get_cubic_coefficientsl  s   

r  coeffstsc                 C   s    t |}tdd t| |D S )Nc                 s       | ]	\}}|| V  qd S r7   r)   r1   rw  rx  r)   r)   r*   r    r  z+_upsample_cubic_interp1d.<locals>.<genexpr>)r  rM  r  )r  r  coeffs2r)   r)   r*   _upsample_cubic_interp1d  s   r  c                 C   s   t tj| S r7   )r   rA   r  )r  r)   r)   r*   rM    s   rM  	num_stepsc                 C   sB   | dkrt jd||dS |s| d |  nd}t j| || ||dS )Nr!   r   rf  )stepsre  r   )rA   r'  linspace)r  r9  r   re  r  r)   r)   r*   _linspace_from_neg_one  s   r  thetahrv  c           	      C   s   | j }| j}t||||d|d}t|||||dd}tjd||d}tjjj|dddd}tjjj|dddd}tjjj|d	ddd}|| | S )
Nr!   )r!   r!   r!   r  )r   r"   constantr   r  r  rz   r!   r!   )r"   r   	r   re  r  rm  rA   rj  r-  r  r  )	r  r  rv  r9  r   re  grid_xgrid_ygrid_oner)   r)   r*   _make_base_grid_4d  s   r  r  c                 C   s   | j }| j}t||||dd|d}t||||d|dd}t|||||ddd}	tjd||d}
tjjj|dddd}tjjj|dddd}tjjj|	d	ddd}	tjjj|
d
ddd}
|| |	 |
 S )Nr!   )r!   r!   r!   r!   r  )r   r   r  r   r  r  r"   r!   )r   r   r  )r  r  r  rv  r9  r   re  r  r  grid_zr  r)   r)   r*   _make_base_grid_5d  s   r  c           	      C   sL   |\}}}}t | |||d}|ddd| jd d}||||dS )Nr9  rT   r   r!   r2  r"   )r  rm  r7  rV   r   )	r  r  r9  r  rW   r  rv  	base_gridgridr)   r)   r*   _affine_grid_generator_4d  s    r  c           
      C   sR   |\}}}}}t | ||||d}|ddd| jd d}	|	||||dS )Nr  rT   r   r!   r2  r   )r  rm  r7  rV   r   )
r  r  r9  r  rW   r  r  rv  r  r  r)   r)   r*   _affine_grid_generator_5d  s    r  c                 C   s@   t t|dv dd  t|dkrt| ||dS t| ||dS )NrT  c                   S   r  )NzCaffine_grid_generator needs 4d (spatial) or 5d (volumetric) inputs.r)   r)   r)   r)   r*   r     r  z'affine_grid_generator.<locals>.<lambda>r   r  )rA   r   rY  r  r  )r  r  r9  r)   r)   r*   affine_grid_generator  s   
r  r  interpolation_modepadding_mode_expand_gridc                    sJ  t dv fdd t dv fdd dtdtdtffdd	dtd
tdtdtfdddtdtdtffdddtdtdtffdd}j\ |j\}}|dkscJ ru|d| d}dtdtdtffddt jjddddt j jdd dddtdtdtdt	f fdddtdtdtffdd
|d  }	|d! }
d"kr1||	}||
}|
 |
 d }}d }}||}}|| ||  }|| ||  }|| ||  }| |  }t
fd#d$|f|||f|||f|||ffD S dkrN||	}||
}| }| }
||dS |	}|
}|
 |
 | | }sud|d}dtdtdtf
fd%d&d'tdtffd(d)	t	fd*d$td+D }t||S ),N)r   r!   r"   c                      r  )NzInvalid interpolation mode r)   r)   )r  r)   r*   r     r  z"_grid_sampler_2d.<locals>.<lambda>c                      r  )NzInvalid padding mode r)   r)   )r  r)   r*   r     r  coordsr  rR   c                    s0    r|d d n|d }|d d }| | | S rG  r)   )r  r  r   ofsr  r)   r*   unnormalize  s   z%_grid_sampler_2d.<locals>.unnormalize	twice_low
twice_highc                 S   sv   ||kr	t | S |d }|| d }| |  }t ||}||  jt jd}t |d@ dk|| || | S )Nr"   r   r!   r   )rA   r   r   fmodfloorr9   int8rh   )r  r  r  
coords_mincoords_spancoords2extraflipsr)   r)   r*   reflect_coordinates  s   
z-_grid_sampler_2d.<locals>.reflect_coordinatesc                    sf   dkr| S dkrt | d|d S  r | dd|d  }n
| dd| d }t |d|d S )Nr   r!   r"   rT   r   )r  r  coords_reflected)r9  r  r  r)   r*   compute_coordinates  s   z-_grid_sampler_2d.<locals>.compute_coordinatesc                    s   | |} ||S r7   r)   )r  r  	coords_un)r  r  r)   r*   compute_source_index  s   

z._grid_sampler_2d.<locals>.compute_source_indexr"   r!   r^  ysc                    s,   t d| kt | k t d|k| k S rq   rA   rl  )r^  r  )iHiWr)   r*   in_bounds_cond"  s   $z(_grid_sampler_2d.<locals>.in_bounds_condrd  wsc                    sN   | |r	nd t  fdd| jtjd|jtjd|fD S )Nr!   c                 3   s*    | ]}t |d  V  qdS r  )rA   rh   rm  r  )rd  r  r  oHoWr)   r*   r  1  s
    
z1_grid_sampler_2d.<locals>.clip.<locals>.<genexpr>r   )r  r9   rA   r  )r^  r  r  )re  rd  r  r  r  r  )r  r  r*   clip*  s
   
z_grid_sampler_2d.<locals>.clipixiyc                    s&   | ||\}}} ||f | S r7   r)   )r  r  rv  idx_xidx_yw_)C_idxN_idxr  r  r)   r*   get_summand6  s   z%_grid_sampler_2d.<locals>.get_summand).r   ).r!   r   c                 3   s"    | ]\}}} |||V  qd S r7   r)   )r1   r  r  rv  )r  r)   r*   r  L  s
    

z#_grid_sampler_2d.<locals>.<genexpr>c                    s     | } |}||dS r\   r)   )r  r  r2   r[   )r  r  r  r  r)   r*   get_value_boundedk  s   

z+_grid_sampler_2d.<locals>.get_value_boundedr  c                    sF   | d  } d | | d | d |f}t |S )Nr!   r"   )r  )r  iy_ofscs)r  ix_nwiy_nwtxr)   r*   	get_coeffp  s   
z#_grid_sampler_2d.<locals>.get_coeffc                 3       | ]} |V  qd S r7   r)   )r1   r  )r  r)   r*   r  z  r  r   )rA   r   r   ra  r   rm  rg  ri  re  r   r  rM  rf  rV   r  rU   r  )r  r  r  r  r9  r  r  rW   twor2   r[   r  r  ix_neiy_neix_swiy_swix_seiy_sew_nww_new_sww_se
ix_nearest
iy_nearesttyr  r)   )re  r  rd  r  r  r  r9  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r*   _grid_sampler_2d  sx   
 ( 




	





 

r  c                 C   s   t | ||||dS )N)r  r  r  r9  )r  )r  r  r  r  r9  r)   r)   r*   grid_sampler_2d~  s   
r  c                    s`   t   dko dk fdd t  ddk fdd   jddS )Nr"   r!   c                      s   d    d   S )Nzmatrix @ vector expected, got rj  r}   r)   ry   r_  r)   r*   r     rk  zmv.<locals>.<lambda>r   c                      s*   d  d d  d d d dS )Nzsize mismatch, got input (r   r2   r!   z), vec (r  rl  r)   r  r)   r*   r     s   * r}   )rA   r   rQ   r  r   r  r)   r  r*   r`    s   r`  c                 C   sd   |d ur|d | d }d| |  |t |   }nd| |  t |  }|d ur-|| }t||S r\   )r   
logsigmoidr   )ry   r   r   
pos_weightr   
log_weightr   r)   r)   r*    binary_cross_entropy_with_logits  s   
r  tensor1tensor2is_outc           	         s   | j |j kr
| |fn|| f\}}ddlm  |j dkr |j dks"dS |jr)|s)dS | j dkr0dS  | dkr:dS |j}| }dg}t|dd  D ]}|||d   qLt	 fd	d
t
|tt||D S )Nr   r  r   r"   FTr!   rT   c                 3   s.    | ]\}}} |d kp ||kV  qdS r  r)   )r1   r   r   r  r  r)   r*   r    s
    
zshould_fold.<locals>.<genexpr>)rT  rJ  r  r#  r   r   rM  re  r%  r   r  rL  )	r  r  r  t1t2t1_shape	t1_strideexpected_strider  r)   r  r*   should_fold  s(    

r  )pass_is_out)r  c                C   s  ddl m}m} |  }| }|dkr|dksJ |dkr(|dkr(t| |S |dkr6|dkr6t| |S |dkrL|dkrLttt	| d|dS |dkrZ|dkrZt| |S t
| ||r||k}|ri|jn| }|so|n	|dkrw|  n| }	|j}
t|
d d }ttj|}|	 dk}|r||	jd  |||
d }|rtjj||	|}|r|j S |S tjj||	|S |dkr|dkr|dkr| dnd}| d}| jd d }|dkr|dn|d}|dkr|dnd}g }t|d D ]}||| q|dkrP|dkrP||d |d krP||d dkr;| jr;t| d|S ||d dkrP|jrPt| |dS tt||}|||g }t|}| ||||}|dk}|r||g }||||	d}n|||g }|||||}|}|dkr|| |dkr|| |r||d |S || |S t!dd	d
  d S )Nr   )r  guard_or_truer!   r"   rT   r2  r   Fc                   S   r  )Nz/both arguments to matmul need to be at least 1Dr)   r)   r)   r)   r*   r   J  r  zmatmul.<locals>.<lambda>)"rJ  r  r  rQ   rA   dotr`  r  rX  rV   r  r7  r  r   rL  r   r>  r   r%  r  r  r   _unsafe_viewr  r  rU   r#  r6  broadcast_shapesr  rg  bmmrm  r   )r  r  r  r  r  dim_tensor1dim_tensor2r  r  r  sizes_1output_shapefolded_dim1t2_is_matrix	t1_foldedr~  r  m1batch_tensor1m2r,  batch_tensor2r   expand_batch_portiontensor1_expand_sizeexpand_batch_producttensor1_expanded
vector_rhstensor2_expand_sizetensor2_expandedr)   r)   r*   r6    s   	










r6  r:  r;  c                    s  j \}}t|d ||}t|d ||}tjtjjd\}}tj|d jdj	|d}	tj|d jdj	|d}
t
||
|}t
||	|}|d}| }| }|| dd}|| dd}|	tj}|	tj}|d ||d |d	 f}|d ||d |d	 ft|t|}d
\jtjkrtt|fddD fdd|D }fddfdd t fdd|D }jtjkrd usJ t||}ntdd t||D }t}|j|d}|S )Nr   r!   r	  rd  r   rT   r   rf   r"   r  c                    .   g | ]}|d  >  t |d  t jqS r!   r   rA   r   r9   int16r1   rv  )weights_precision_xr)   r*   r4          z.upsample_bicubic2d_default.<locals>.<listcomp>c                    r  r	  r
  r  )weights_precision_yr)   r*   r4     r  c                    s<   t | d d }t |dd }td d ||g}|S r6  )rA   r   r   r  )r  r^  y_idxx_idxrj  )in_hin_wr   r)   r*   load_bounded  s   z0upsample_bicubic2d_default.<locals>.load_boundedc                    sT   t  fddD }jtjkrd usJ t|S tdd t|D S )Nc                 3   s    | ]} |V  qd S r7   r)   )r1   x_ofs)r  r[   r)   r*   r    rm  zCupsample_bicubic2d_default.<locals>.get_x_interp.<locals>.<genexpr>c                 s   r  r7   r)   r  r)   r)   r*   r    r  )r  r   rA   r  rN  rM  r  )r[   src_x)r   ixs_ofsr  r  	weights_x)r[   r*   get_x_interp  s
   z0upsample_bicubic2d_default.<locals>.get_x_interpc                 3   r  r7   r)   )r1   y_ofs)r  r)   r*   r    r  z-upsample_bicubic2d_default.<locals>.<genexpr>c                 s   r  r7   r)   r  r)   r)   r*   r    r  r0  )r   rF  rF   rG   r  rd  rA   ri  re  r9   rI  rV   r  r   r  r  r   r  rT  r  rN  rM  r  r   r  )r   r  r9  r:  r;  rW   h_scale_factorw_scale_factorr   r   r9  x_floaty_floatr2   r[   yscalerb  iys_ofs	weights_ysrc_yr   r1  r)   )	r  r  r  r   r  r  r  r  r  r*   upsample_bicubic2d_defaultM  sR   




r#  c                 C   s   t t|t| dkdd  |d u r2|d usJ ttttf tdd t| jdd  |D }|r6|nd\}}t| ||||S )Nr!   c                   S   r  )Nz:Must specify exactly one of output_size and scale_factors.r)   r)   r)   r)   r*   r     r  z(upsample_bicubic2d_vec.<locals>.<lambda>c                 s   s$    | ]\}}t t|| V  qd S r7   )r   r   )r1   rv  rm   r)   r)   r*   r    s
    
z)upsample_bicubic2d_vec.<locals>.<genexpr>r"   r  )	rA   r   rk  r   r  ra  r  r   r#  )r  r  r9  r  r:  r;  r)   r)   r*   upsample_bicubic2d_vec  s   
r$  c                        fdd}t  ||S )Nc                    s4   t j|  ||  jd}|d |d |    S )Nrd  r!   )rA   ri  re  r   r   middler   dim_idxr  r)   r*   rq    s   z_reflection_pad.<locals>.idx_reflection_or_replication_padr  r  rq  r)   r)  r*   _reflection_pad     r-  c                    r%  )Nc                    s*   t j|  ||  jd}t |d|d S )Nrd  r   r!   )rA   ri  re  r   r&  r)  r)   r*   rq    s   z_replication_pad.<locals>.idxr*  r,  r)   r)  r*   _replication_pad  r.  r/  idx_fnc                    s   t d  t|   d  d fv  fdd | j  d  }|    } fddt D } fddt D }| }t D ]}d g|  }	||| || || |	|| < t||	}qFt	|}
|j
|
d}|S )	Nr"   r!   c                      s    d  d d  d d  dS )Nreflection_padzd requires r!   zD or r"   zD inputr)   r)   r}   r)   r*   r          z0_reflection_or_replication_pad.<locals>.<lambda>c                        g | ]}d  d |   qS r  r)   rA  rQ   r  r)   r*   r4     r2  z2_reflection_or_replication_pad.<locals>.<listcomp>c                    $   g | ]}d  d |  d  qS r  r)   rA  r4  r)   r*   r4     rl  r0  )rY  rA   r   rQ   r   rU   r   r  rF   r   r  )r  r  r0  	inp_shapenc_dimpadding_leftpadding_rightr   r   rq  r1  r)   r4  r*   r+    s"   
 
r+  c                    s\  t d dd |j d  D fddtD fddtD g }t|jD ]}dg|j }d||< |tj|j| |jd| q2|d    | d  
d	d
 
fddtD 
fddtD }
fddtD }fddtD 	t	
tj	fddtD }t|  d}	 fdd}
tjdd tD  D ]f}|tdg krqg }g }tD ]K}|| dkr| }	| }n0|| dkr|| }
| d| f}n|| dkr|| }
| | |  | d f}|| || q|
|	||}	q|	S )Nr"   c                 S   s   g | ]}|d  qS r  r)   )r1   r  r)   r)   r*   r4     rJ  z,_reflection_pad_backward.<locals>.<listcomp>c                    r3  r  r)   rA  r4  r)   r*   r4     r2  c                    r5  r  r)   rA  r4  r)   r*   r4     rl  r!   rT   rd  c                 S   s   | \}}}t ||k||kS r7   r  )index_ranger   lbubr)   r)   r*   index_range_condition  s   
z7_reflection_pad_backward.<locals>.index_range_conditionc                    s   g | ]
}|  |  qS r)   r)   rA  r8  xyzr)   r*   r4   !  r  c                    s   g | ]
} | |  qS r)   r)   rA  r>  r)   r*   r4   "  r  c                    s(   g | ]}d  |  |  |  qS r  r)   rA  )dhwr8  r?  r)   r*   r4   #  r_  c                    s.   g | ]} | d | |  |  fqS r  r)   rA  )centerr@  r8  r9  r)   r*   r4   '  s    "c                    s   g | ]} | qS r)   r)   rA  )r=  range_cr)   r*   r4   +  rk  r   c                    st   t D ]}|| d || d k }t|tr|r|   S qttjfdd|D }t| | d}| | S )Nr"   r!   c                    s   g | ]} |qS r)   r)   )r1   r:  )r=  r)   r*   r4   9  rJ  z@_reflection_pad_backward.<locals>.accumulate.<locals>.<listcomp>r   )rU   r/   rk  rN   r   r   rl  rn  )r   r   index_rangesr   upper_less_than_lowerr  g)r  rQ   rk   r=  r)   r*   r  /  s   z,_reflection_pad_backward.<locals>.accumulatec                 S   s   g | ]}g d qS ))rT   r   r!   r)   r  r)   r)   r*   r4   >  rJ  r   )rY  r   rU   rT  r%  rA   ri  re  rm  rN   r   r   rl  rn  	itertoolsr	   r  )rk   r2   r  rp  r   
view_shapeleft_reflectright_reflectr  r   r  areaoutsrC  r   r:  r)   )r  rA  r@  rQ   rk   r=  r  r8  r9  rB  r?  r*   _reflection_pad_backward  sT   $
"
rL  r   r   r   c                C   s(   t j| ||d}t j| ||d}||fS )Nr   )rA   aminr  )ry   rQ   r   rM  r  r)   r)   r*   aminmaxY  s   rN  r   c                C   s"   t jtt| d| |||dS )Nr   r   )r   r   rA   rh   isnan)ry   rQ   r   r   r)   r)   r*   nansumc  s   "rP  r   r  re  r  r  c             	   C   s   t jjd| d||||dS )Nr   r!   rQ  r   ri  
start_step)rC  r   r  re  r  r)   r)   r*   arange_defaulti     
rT  c             	   C   s   t jj| |d||||dS )Nr!   rQ  rR  )rB  rC  r   r  re  r  r)   r)   r*   arange_startx  rU  rV  c                  O   s   ddl m} || i |S )Nr   )out_dtype_dense)!torch._higher_order_ops.out_dtyperW  )rH   rI   rW  r)   r)   r*   out_dtype_decomp  s   rY  marginc           	         s  t t jd jd  t |dkp|dkdd  t jdko, dkfdd t jdko? kfdd d urdt t jdko\  k fdd dt jdd	}||  }|	d}|dkr|n|| }d ur|  }t j
 jd
}t |k|d}|tjjkr| S |tjjkr| |jd  S |jddS )Nr   r!   r"   c                   S   r  )Nz only p == 1 and p == 2 supportedr)   r)   r)   r)   r*   r     r  z#multi_margin_loss.<locals>.<lambda>c                      rD  NzMExpected non-empty vector or matrix with optional 0-dim batch size, but got: r  r)   rU  r)   r*   r     rE  c                         d  dj  S )Nz#inconsistent target size, expected r  r  r)   )nframer   r)   r*   r     r  c                      r\  )Nz#inconsistent weight size, expected r  r  r)   )rQ   r   r)   r*   r     r  rn  rd  r}   )rA   
atleast_2d
atleast_1dr   r   rT  r   rV   ru  r8  ri  re  rh   r    r'   rz   r   r(   r   )	r   r   r,  rZ  r   r   uri   rq  r)   )rQ   r   r]  r   r   r*   multi_margin_loss  sB   







ra  	is_targetc                    s  | j  |j t| } t|}| j d }tt dko |dk fdd ttdko2 k fdd tj||jd}|dk}tjt|||dd	d
}||k }t||d}tj	| d|d}	t||d}
tj
||
jddkdd}d|	jjdd |  }|d}|| }t|d|}|tjjkr|jdd }n|tjjkr| }n|jdd}|| j}||fS )Nr!   r"   r   c                      r  r[  r)   r)   )orig_input_shaper)   r*   r     r  z0multilabel_margin_loss_forward.<locals>.<lambda>c                      s   d d  S )Nzinconsistent target size: z for input of size: r)   r)   rc  orig_target_shaper)   r*   r     r   rd  rT   Tr   rn  r}   rf   )r   rT   )r   rA   r^  r   rY  ri  re  rM  rh   ru  anyrV   Tr8  r    r'   rz   r   r   r(   r9   r   r  )r   r   r   rQ   rq  is_endend_idxtarget_masktidx0r`  tidx1rb  ri   r)   rd  r*   multilabel_margin_loss_forward  s@   





rm  )	attn_maskrm   querykey	dropout_p	is_causalrn  c          	         s   t t fdd t  dko  dko  dkfdd t  dk fdd t jd jd koJjd jd kdd  tjj| |d |d	d	kd
	\}}|	ddd	dj
t jd	d	ddd}||fS )Nc                      rD  )Nz-query must be FP32, FP64, BF16, FP16 but got r   r)   )ro  r)   r*   r   	  rE  z<scaled_dot_product_flash_attention_for_cpu.<locals>.<lambda>r   c                      s"   d   d    d   S )Nz,q, k, v must be a 4 dimensional tensor, got rj  r}   r)   )rp  ro  rz   r)   r*   r     s   " r   c                      r  )Nz&dropout probability must be zero, got r)   r)   )rq  r)   r*   r     r  r   c                   S   r  )Nz&q, k, v should have the same head sizer)   r)   r)   r)   r*   r     r  r!   )rn  rq  rr  dropout_maskrm   
enable_gqar"   r   r0  )rA   r   r  rQ   r   r   "_scaled_dot_product_attention_mathr]  r  r  r  r5  )	ro  rp  rz   rq  rr  rn  rm   r~  attnr)   )rq  rp  ro  rz   r*   *scaled_dot_product_flash_attention_for_cpu  s@   
"&
#rw  c                    s   t |  fdd}|S )Nc                     s    | i |}| d  |S rq   )r  )rH   rI   r   outplace_opr)   r*   
inplace_opB  s   z$register_inplace.<locals>.inplace_opr   )aten_opry  rz  r)   rx  r*   register_inplaceA  s   r|  c                 C   sx   |   s|  st|}t|}t||}t|tjr |dkr$|| }|dkr*|S t|tjr4|dkr8| | } | | S )Nr!   r   )r  rW  ra  rA   r  r/   numbersNumber)ry   batch1batch2rc   rl   r   r)   r)   r*   baddbmmJ  s   r  c                 C   s   t j| |ddS )Nr  r  r  )ry   r+  r)   r)   r*   floor_divide\  s   r  c                 C   s   t tj| jdS r\   )rN   r   r>  r   r   )r  r)   r)   r*   	sym_numelb  r  r  r   r   c                C   s.   |d u rt jj| g |dS t jj| g ||dS )Nr   r  )r   r   dim_IntListIntList_out)ry   r   r   r)   r)   r*   sum_defaultg  s   r  c                 C   sB   t | tjs| S |d u rtj| tt|  S tj| |gS r7   )	r/   rA   r   r   r  dimsrL  rU   rQ   )ry   rQ   r)   r)   r*   squeeze_defaultt  s
   r  c                    s`   t  fddtt| jD }|jtjkrtjnd }| jd|d|d}| ||	|j  |fS )Nc                 3   s    | ]	}| kr|V  qd S r7   r)   rA  r}   r)   r*   r    r  z)_weight_norm_interface.<locals>.<genexpr>r"   T)r   r   )
r  rU   rY  r   r   rA   r"  r   r   r9   )rj  rE  rQ   keep_dim
norm_dtyper   r)   r}   r*   _weight_norm_interface  s    r  assume_uniqueinvertc                C   s|   t | tjstj| |jd} t |tjs"|rt| |S t| |S | dt|  d k r6t	| ||dS t
| |||dS )Nrd  g      $@g(\?r  r  )r/   rA   r   r'  re  ner   r   r3  isin_defaultisin_sorting)elementstest_elementsr  r  r)   r)   r*   isin  s   r  )r  c                C   sP   |d u rt j|  t j| jd}nt j|  |t j| jd}|| k | j}|S )Nr  )r  r   re  )rA   randr  r   re  r9   r   )ry   r  raw_pr,  r)   r)   r*   	bernoulli  s   r  r  c                C   sl   |   dkrtj| tjdS | jd|j  }| |}ttd|j d d}||kj	|d}|r4| S |S )Nr   r   r  rT   r!   r}   )
r   rA   
empty_likerk  r   rT  rm  r  rU   rf  )r  r  r  expanded_elem_shaper2   rQ   r  r)   r)   r*   r    s   
r  c                C   s   |   }|  }|rIt||g}tj|dd\}}|dd  |d d k}	t|	ddgd}	|r5|	 }	t|	}
|
d||	}
|
d|   S t|\}}t	||}t
|| k |d}|| |k}|rm| n|}|| jS )NT)stabler!   rT   r   F)r  rA   r  sortr#  logical_notr  r  r   searchsortedrh   r  r   )r  r  r  r  elements_flattest_elements_flatall_elementssorted_elementssorted_orderduplicate_maskrr  sorted_test_elementsrW   rq  test_idxcmpr)   r)   r*   r    s$   
r  c                 C   s   |  d}|| S rS   )r  )ry   rt  	flattenedr)   r)   r*   take  s   
r  c                 C   s2   |d u rt j}|t jkrt|}tj| |j|dS r  )rA   r5  preserve_formatr   r   resizer   )ry   r+  r1  r)   r)   r*   	resize_as  s
   
r  )FF)r   r7   r  )r   NNr!   )rT   FFr  r  r  )r!   r!   F)Fr   )r   rf   N)r   r!   N)Fr  )NNN)r   r   FT)r   r   Fr  )r   F(  rN   rF  r}  r>  rN  collections.abcr   r   
contextlibr   enumr   r   r   r   r	   typingr
   r   r   r   rA   torch._meta_registrationstorch._primsr  r  torch._prims_common_prims_commonrF   torch.nn.functionalr-  r  r   r   r   r   torch._decompr   rX  r   r   r   r   r   r   torch._prims_common.wrappersr   r   r   r   torch.utilsr   rD   torch.utils._pytreer   r  DispatchKeyr   rL  str__annotations___opsr  r   r    r  rk  rP   r  compute_only_pw_cast_for_opmathpw_cast_for_opmath"pw_cast_for_opmath_non_tensor_argsrd  pw_cast_for_int_to_realra  rX   r`   rb   rj   r   ru   rq  r  r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r'   rz   r   _safe_softmaxr   r   r   r]  rY   r   r   r   r  r  r  r  r!  r&  r(  r)  r*  r-  r@  rG  slicerZ  rb  rF  rv  r{  r}  r  r  r  r  r  r  r  r  r  py_implCompositeImplicitAutogradAutogradr  r  r  r  r  r  r+  r-  r/  r2  r3  r9  r<  r>  r8  rL  rE  rT  rY  r^  ra  rz  r~  r  r  r  r  r  r  r  r  unsafe_chunkr  r  r  no_statsr  r  r  r  r  r  _fused_dropoutr  r  re  r1  rC  lift
lift_freshr  r  r  r  r  r  r  r  _adaptive_avg_pool2dr:  rC  rS  rW  r^  r_  r\  rz  r  r  r~  r  r  	Generatorr  r  r  r  r_  r  r  r  _upsample_nearest_exact1dr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rnn_tanhr   r  rnn_relur  r  r  r  r  r  r  r'  lstmr)  r*  r1  r2  grur3  r4  r7  r<  r>  r?  rD  rE  rC  rA  rF  rI  rN  rT  r@  rk  rl  r  r  r  rn  rt  ry  r{  r|  r  r  r  r  rM  r  r  r  r  r  r  r  r  r`  r  r  r6  upsample_bicubic2dr#  r$  reflection_pad1dreflection_pad2dreflection_pad3dr-  replication_pad1dreplication_pad2dreplication_pad3dr/  r+  reflection_pad1d_backwardreflection_pad2d_backwardreflection_pad3d_backwardrL  rN  rP  ri  r  r  rT  rB  rV  rY  ra  rm  +_scaled_dot_product_flash_attention_for_cpurw  r|  r  r  r  r   r  r  rQ   r  r  r  r  r  r  r  r  addbmm_addbmmaddmm_addmv_baddbmm_fill_gelu_r\  
hardswish_	hardtanh_hardtanhhardsigmoid___iand____and____ilshift__
__lshift__ri  rj  index_reduce_index_reduce__ior____or____irshift__
__rshift____ixor____xor__leaky_relu_
leaky_relulogit_logitrelu_r]  renorm_renormround_rf  scatter_r  scatter_add_scatter_addscatter_reduce_scatter_reducesilu_r)   r)   r)   r*   <module>   s  


* 
 

 
	




  *!	
6

'"
	P`
 
	
%


(


*
*00
	

W	

	
S



6J	
R		#

	

	



,
	

	

h	
%	$f-7
("$$





 




  		

.2
)


  @2					

	
I"

6


 (
.$$


* 
'


* 
y
S


0


0




W


,

<

	D	

"
	


