o
    iON                     @   sX   d dl Z d dl mZ ddlmZmZmZ dgZdddZ	dddZG dd deZ	dS )    N)Tensor   )
_to_scalar	OptimizerParamsTLBFGSc                 C   s   |d ur	|\}}n| |kr| |fn|| f\}}|| d||  | |   }	|	d ||  }
|
dkrj|
  }| |krN|||  || |	 || d|     }n| | | || |	 || d|     }tt|||S || d S )N      r   g       @)sqrtminmax)x1f1g1x2f2g2bounds
xmin_bound
xmax_boundd1	d2_squared2min_pos r   S/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/torch/optim/lbfgs.py_cubic_interpolate   s   
	*(r   -C6??&.>   c           !   	   C   s   |   }|jtjd}| |||\}}d}||}d|||f\}}}}d}d}||
k r|||| |  ks@|dkrV||krV||g}||g}||jtjdg}||g}npt || | krk|g}|g}|g}d}n[|dkr||g}||g}||jtjdg}||g}nA|d||   }|d }|}t||||||||fd}|}|}|jtjd}|}| |||\}}|d7 }||}|d7 }||
k s.||
krd|g}||g}||g}d}|d |d	 krd
nd\}}|s||
k rt |d |d  | |	k rn t|d |d |d |d |d |d }dt|t|  } tt|| |t| | k rb|s>|t|ks>|t|kr_t |t| t |t| k rVt||  }nt||  }d}nd}nd}| |||\}}|d7 }||}|d7 }|||| |  ks||| kr|||< |||< |jtjd||< |||< |d |d krd
nd\}}nGt || | krd}n%||| ||   dkr|| ||< || ||< || ||< || ||< |||< |||< |jtjd||< |||< |s||
k s|| }|| }|| }||||fS )Nmemory_formatr   r   FTg{Gz?
   )r   )r   r   )r   r   g?)absr   clonetorchcontiguous_formatdotr   r   )!obj_funcxtdfggtdc1c2tolerance_changemax_lsd_normf_newg_newls_func_evalsgtd_newt_prevf_prevg_prevgtd_prevdonels_iterbracket	bracket_f	bracket_gbracket_gtdmin_stepmax_steptmpinsuf_progresslow_poshigh_posepsr   r   r   _strong_wolfe(   s   
$
* ""
$ arK   c                       s   e Zd ZdZ							d!ded	eeB d
ededB dededededB ddf fddZ	dd Z
dd Zd"ddZdd Zd"ddZdd Ze dd  Z  ZS )#r   a  Implements L-BFGS algorithm.

    Heavily inspired by `minFunc
    <https://www.cs.ubc.ca/~schmidtm/Software/minFunc.html>`_.

    .. warning::
        This optimizer doesn't support per-parameter options and parameter
        groups (there can be only one).

    .. warning::
        Right now all parameters have to be on a single device. This will be
        improved in the future.

    .. note::
        This is a very memory intensive optimizer (it requires additional
        ``param_bytes * (history_size + 1)`` bytes). If it doesn't fit in memory
        try reducing the history size, or use a different algorithm.

    Args:
        params (iterable): iterable of parameters to optimize. Parameters must be real.
        lr (float, optional): learning rate (default: 1)
        max_iter (int, optional): maximal number of iterations per optimization step
            (default: 20)
        max_eval (int, optional): maximal number of function evaluations per optimization
            step (default: max_iter * 1.25).
        tolerance_grad (float, optional): termination tolerance on first order optimality
            (default: 1e-7).
        tolerance_change (float, optional): termination tolerance on function
            value/parameter changes (default: 1e-9).
        history_size (int, optional): update history size (default: 100).
        line_search_fn (str, optional): either 'strong_wolfe' or None (default: None).
    r      NHz>r   d   paramslrmax_itermax_evaltolerance_gradr3   history_sizeline_search_fnreturnc	           
         s   t |tr| dkrtdd|kstd| |d u r$|d d }|||||||d}	t ||	 t| jdkr@td| jd	 d
 | _d | _	d S )Nr   zTensor lr must be 1-elementg        zInvalid learning rate:       )rP   rQ   rR   rS   r3   rT   rU   z>LBFGS doesn't support per-parameter options (parameter groups)r   rO   )

isinstancer   numel
ValueErrorsuper__init__lenparam_groups_params_numel_cache)
selfrO   rP   rQ   rR   rS   r3   rT   rU   defaults	__class__r   r   r]      s*   	
zLBFGS.__init__c                 C   s&   | j d u rtdd | jD | _ | j S )Nc                 s   s.    | ]}t |rd |  n| V  qdS )r	   N)r'   
is_complexrZ   .0pr   r   r   	<genexpr>  s
    
zLBFGS._numel.<locals>.<genexpr>)ra   sumr`   rb   r   r   r   _numel  s
   

zLBFGS._numelc                 C   s   g }| j D ]6}|jd u r||  }n|jjr#|j d}n|jd}t	|r6t
|d}|| qt|dS )Nr$   r   )r`   gradnewrZ   zero_	is_sparseto_denseviewr'   rf   view_as_realappendcat)rb   viewsri   rs   r   r   r   _gather_flat_grad$  s   


zLBFGS._gather_flat_gradc                 C   s|   d}| j D ]$}t|rt|}| }|j||||  ||d ||7 }q||  kr<td| d|   d S )Nr   alphazExpected offset z
 to equal )	r`   r'   rf   rt   rZ   add_view_asrm   AssertionError)rb   	step_sizeupdateoffsetri   rZ   r   r   r   	_add_grad2  s   


 
zLBFGS._add_gradc                 C   s   dd | j D S )Nc                 S   s   g | ]	}|j tjd qS )r!   )r&   r'   r(   rg   r   r   r   
<listcomp>?  s    z&LBFGS._clone_param.<locals>.<listcomp>)r`   rl   r   r   r   _clone_param>     zLBFGS._clone_paramc                 C   s(   t | j|ddD ]	\}}|| qd S )NT)strict)zipr`   copy_)rb   params_datari   pdatar   r   r   
_set_paramA  s   zLBFGS._set_paramc                 C   s0   |  || t| }|  }| | ||fS N)r   floatrx   r   )rb   closurer+   r,   r-   loss	flat_gradr   r   r   _directional_evaluateE  s
   

zLBFGS._directional_evaluatec           &   
      s  t jdkrtdt j t   jd }t|d }|d }|d }|d }|d }|d	 }|d
 }	jjd  }
|
dd |
dd   }t	|}d}|
d  d7  < 
 }|  |k}|ro|S |
d}|
d}|
d}|
d}|
d}|
d}|
d}|
d}d}||k rA|d7 }|
d  d7  < |
d dkr| }g }g }g }d}n||}||}||}|dkrt ||	kr|d |d |d || || |d|  ||| }t |}d|
vrdg|	 |
d< |
d }| }t|d ddD ]}|| |||  ||< |j|| ||  d qt|| }} t|D ]}|| | ||  }!| j|| || |! d qF|du ro|jtjd}n|| |}|
d dkrtdd|   | }n|}||}"|"| krnd}#|dur|dkrtd }$ fdd}%t|%|$|||||"|| d \}}}}#|| |  |k}n5|| ||kr
t    }W d   n	1 sw   Y  t	|}
 }|  |k}d}#||#7 }|
d  |#7  < ||krn%||kr"n|r&n||  |kr3nt|| |k r=n||k s||
d< ||
d< ||
d< ||
d< ||
d< ||
d< ||
d< ||
d< |S )!zPerform a single optimization step.

        Args:
            closure (Callable): A closure that reevaluates the model
                and returns the loss.
        r   z*Expected exactly one param_group, but got r   rP   rQ   rR   rS   r3   rU   rT   
func_evalsn_iterr-   r,   old_dirsold_stpsroH_diagprev_flat_grad	prev_lossg|=g      ?alNr$   ry   r!   strong_wolfez only 'strong_wolfe' is supportedc                    s     | ||S r   )r   )r+   r,   r-   r   rb   r   r   r*     r   zLBFGS.step.<locals>.obj_func)r4   )r^   r_   r}   r'   enable_gradr   stater`   
setdefaultr   rx   r%   r   getnegsubmulr)   popru   ranger{   r&   r(   r   r   rk   RuntimeErrorr   rK   r   )&rb   r   grouprP   rQ   rR   rS   r3   rU   rT   r   	orig_lossr   current_evalsr   opt_condr-   r,   r   r   r   r   r   r   r   ysysnum_oldr   qirbe_ir0   r8   x_initr*   r   r   r   stepL  s   




























  z
LBFGS.step)r   rL   NrM   r   rN   N)rV   N)__name__
__module____qualname____doc__r   r   r   intstrr]   rm   rx   r   r   r   r   r'   no_gradr   __classcell__r   r   rd   r   r      sH    $	
$


r   )r   r   r   r    )
r'   r   	optimizerr   r   r   __all__r   rK   r   r   r   r   r   <module>   s   

 -