o
    iE                      @   s  d Z ddlmZ ddlZddlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZ ddgZG d	d deZd
de de de
 de de d e_ dee dee dee dee dee dededededededededdfddZdee dee dee dee dee dededededededededdfdd Ze	ed!		"	"	"	"d%dee dee dee dee dee d#edB dededededededededdfd$dZdS )&z1Implementation for the Resilient backpropagation.    )castN)Tensor   )_capturable_doc_default_to_fused_or_foreach_differentiable_doc_disable_dynamo_if_unsupported_foreach_doc!_get_capturable_supported_devices_get_scalar_dtype_maximize_doc_params_doc
_to_scalar_use_grad_for_differentiable_view_as_real	OptimizerParamsTRproprpropc                       s   e Zd Z			dddddddedeeB d	eeef d
eeef dededB dededdf fddZ fddZ	dd Z
edddZ  ZS )r   {Gz?g      ?g333333?gư>2   FN)
capturableforeachmaximizedifferentiableparamslretas
step_sizesr   r   r   r   returnc          
         s   t |tr| dkrtdd|kstd| d|d   k r-d  k r-|d k s<n td|d  d|d  |||||||d	}	t ||	 d S )
Nr   zTensor lr must be 1-elementg        zInvalid learning rate: r         ?zInvalid eta values: z, )r   r   r    r   r   r   r   )
isinstancer   numel
ValueErrorsuper__init__)
selfr   r   r   r    r   r   r   r   defaults	__class__ S/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/torch/optim/rprop.pyr'      s   (	zRprop.__init__c                    s   t  | | jD ]S}|dd  |dd |dd |dd |d D ]4}| j|g }t|dkr[t|d s[t	|d }|d rQtj
|t |jd	ntj
|t d
|d< q'q	d S )Nr   r   Fr   r   r   r   stepdtypedevicer0   )r&   __setstate__param_groups
setdefaultstategetlentorch	is_tensorfloattensorr   r1   )r(   r6   grouppp_statestep_valr*   r,   r-   r3   =   s&   

zRprop.__setstate__c                 C   s  d}|d D ]}|j d u rq|t|O }|| |j }	|	jr$td||	 | j| }
t|
dkrv|d rBtjdt	 |j
dntjdt	 d|
d	< tj|tjd
|
d< |jjrjt|	t|d |d |
d< nt|	t|d |
d< ||
d  ||
d  ||
d	  q|S )NFr   z'Rprop does not support sparse gradientsr   r   r,   r/   r2   r.   memory_formatprevr   	step_size)gradr9   
is_complexappend	is_sparseRuntimeErrorr6   r8   zerosr   r1   
zeros_likepreserve_formatr0   	full_likecomplexr   )r(   r=   r   gradsprevsr    state_stepshas_complexr>   rE   r6   r,   r,   r-   _init_groupP   s4   




zRprop._init_groupc                 C   s   |    d}|dur!t  | }W d   n1 sw   Y  | jD ]@}g }g }g }g }g }|d \}	}
|d \}}|d }|d }| ||||||}t||||||||	|
|||d |d |d q$|S )	zPerform a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r    r   r   r   r   )	step_size_minstep_size_maxetaminusetaplusr   r   r   r   rR   ) _cuda_graph_capture_health_checkr9   enable_gradr4   rS   r   )r(   closurelossr=   r   rO   rP   r    rQ   rV   rW   rT   rU   r   r   rR   r,   r,   r-   r.   v   sH   

z
Rprop.step)r   r   r   N)__name__
__module____qualname__r   r;   r   tupleboolr'   r3   rS   r   r.   __classcell__r,   r,   r*   r-   r      s@    

	
&a
  Implements the resilient backpropagation algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \theta_0 \in \mathbf{R}^d \text{ (params)},f(\theta)
                \text{ (objective)},                                                             \\
            &\hspace{13mm}      \eta_{+/-} \text{ (etaplus, etaminus)}, \Gamma_{max/min}
                \text{ (step sizes)}                                                             \\
            &\textbf{initialize} :   g^0_{prev} \leftarrow 0,
                \: \eta_0 \leftarrow \text{lr (learning rate)}                                   \\
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm} \textbf{for} \text{  } i = 0, 1, \ldots, d-1 \: \mathbf{do}            \\
            &\hspace{10mm}  \textbf{if} \:   g^i_{prev} g^i_t  > 0                               \\
            &\hspace{15mm}  \eta^i_t \leftarrow \mathrm{min}(\eta^i_{t-1} \eta_{+},
                \Gamma_{max})                                                                    \\
            &\hspace{10mm}  \textbf{else if}  \:  g^i_{prev} g^i_t < 0                           \\
            &\hspace{15mm}  \eta^i_t \leftarrow \mathrm{max}(\eta^i_{t-1} \eta_{-},
                \Gamma_{min})                                                                    \\
            &\hspace{15mm}  g^i_t \leftarrow 0                                                   \\
            &\hspace{10mm}  \textbf{else}  \:                                                    \\
            &\hspace{15mm}  \eta^i_t \leftarrow \eta^i_{t-1}                                     \\
            &\hspace{5mm}\theta_t \leftarrow \theta_{t-1}- \eta_t \mathrm{sign}(g_t)             \\
            &\hspace{5mm}g_{prev} \leftarrow  g_t                                                \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to the paper
    `A Direct Adaptive Method for Faster Backpropagation Learning: The RPROP Algorithm
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.21.1417>`_.z

    Args:
        a{  
        lr (float, optional): learning rate (default: 1e-2)
        etas (Tuple[float, float], optional): pair of (etaminus, etaplus), that
            are multiplicative increase and decrease factors
            (default: (0.5, 1.2))
        step_sizes (Tuple[float, float], optional): a pair of minimal and
            maximal allowed step sizes (default: (1e-6, 50))
        z	
        z

    r   rO   rP   r    rQ   rT   rU   rV   rW   r   r   r   rR   r!   c                C   s  t | D ]\}}|| }|	s|n| }|| }|| }|| }tj s?|
r?t }|jj|jjkr7|jj|v s?td| d|d7 }t|r\t	|}t	|}t	|}t	|}|rh|
|  }n|
| }|
r|t|d|| |t|d|| |t|dd| n|||d< |||d< d||d< |||| |jtjd}|
r|t||d| nd|||< |j| |dd || qd S )NIIf capturable=True, params and state_steps must be on supported devices: .r   r   rA   value)	enumerater9   compileris_compilingr
   r1   typeAssertionErrorrF   view_as_realmulclonesigncopy_wheregtlteqmul_clamp_rL   addcmul_)r   rO   rP   r    rQ   rT   rU   rV   rW   r   r   r   rR   iparamrE   rC   rD   r.   capturable_supported_devicesrp   r,   r,   r-   _single_tensor_rprop   sJ   





r|   c             
      s  t | dkrd S |rtdtj s0|
r0t  t fddt| |ddD s0td  dt	| ||||g}|
 D ]\\}}}}}}ttt |}ttt |}ttt |}ttt |}ttt |}tj s|d jrtj|tjd	d
dd	d nt|d |rt|||| t||}|	rt| t|| |	rt| |}t| |
r|D ])}|t|d|| |t|d|| |t|dd| qn|D ]}|||d< |||d< d||d< qt|| |D ]	}||| qt|}tt |D ]}|| t|| |d||  q~dd |D }tj|||dd q>d S )Nr   z#_foreach ops don't support autogradc                 3   s0    | ]\}}|j j|j jko|j j v V  qd S r\   )r1   rk   ).0r>   r.   r{   r,   r-   	<genexpr>?  s    

z&_multi_tensor_rprop.<locals>.<genexpr>T)strictrc   rd   r"   cpu)r1   )alphar   c                 S   s   g | ]}|  qS r,   )rp   )r}   rE   r,   r,   r-   
<listcomp>  s    z'_multi_tensor_rprop.<locals>.<listcomp>re   rf   )r8   rl   r9   ri   rj   r
   allzipr   "_group_tensors_by_device_and_dtypevaluesr   listr   is_cpu_foreach_add_r<   r   _foreach_mul_foreach_neg__foreach_copy__foreach_sign_rq   rr   rs   rt   ru   _foreach_mul_rw   range_foreach_addcmul_)r   rO   rP   r    rQ   rT   rU   rV   rW   r   r   r   rR   grouped_tensorsgrouped_params_grouped_grads_grouped_prevs_grouped_step_sizes_grouped_state_steps__grouped_paramsgrouped_gradsgrouped_prevsgrouped_step_sizesgrouped_state_stepssignsrp   rD   ry   
grad_signsr,   r~   r-   _multi_tensor_rprop&  s   
	



r   )single_tensor_fnFr   c
                C   s   t j stdd |D std|du rt| |dd\}}|r*t j r*td|r4t j s4t}nt	}|| |||||
|||||||	d dS )	zpFunctional API that performs rprop algorithm computation.

    See :class:`~torch.optim.Rprop` for details.
    c                 s   s    | ]	}t |tjV  qd S r\   )r#   r9   r   )r}   tr,   r,   r-   r     s    
zrprop.<locals>.<genexpr>zPAPI has changed, `state_steps` argument must contain a list of singleton tensorsNF)	use_fusedz6torch.jit.script not supported with foreach optimizers)rT   rU   rV   rW   r   r   r   rR   )
r9   ri   rj   r   rI   r   jitis_scriptingr   r|   )r   rO   rP   r    rQ   r   r   r   r   rR   rT   rU   rV   rW   r   funcr,   r,   r-   r     s<   

)NFFFF)__doc__typingr   r9   r   	optimizerr   r   r   r   r	   r
   r   r   r   r   r   r   r   r   __all__r   r   r;   ra   r|   r   r   r,   r,   r,   r-   <module>   s   @ "
6	

G	

v		
