o
    0i                     @   s   d dl Z d dlZddlmZmZ ddlmZmZmZm	Z	 e	 r.d dl
m  mZ d dlmZ dd ZG dd dejjZd	efd
dZdS )    N   )AcceleratorStateGradientState)DistributedType
honor_typeis_lomo_availableis_torch_xla_availablec                    sh   t | ttfrt|  fdd| D S t | tr't|  fdd|  D S t | tjr2| 	 S | S )Nc                 3   s    | ]}t | V  qd S Nmove_to_device).0tdevice V/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/accelerate/optimizer.py	<genexpr>   s    z!move_to_device.<locals>.<genexpr>c                    s   i | ]
\}}|t | qS r   r
   r   kvr   r   r   
<dictcomp>    s    z"move_to_device.<locals>.<dictcomp>)

isinstancelisttupler   dicttypeitemstorchTensorto)stater   r   r   r   r      s   

r   c                   @   s   e Zd ZdZd%ddZedd Zejdd Zed	d
 Zejdd
 Zedd Z	e	jdd Z	dd Z
dd Zdd Zd&ddZdd Zdd Zd&ddZdd Zedd  Zd!d" Zd#d$ ZdS )'AcceleratedOptimizera  
    Internal wrapper around a torch optimizer.

    Conditionally will perform `step` and `zero_grad` if gradients should be synchronized when performing gradient
    accumulation.

    Args:
        optimizer (`torch.optim.optimizer.Optimizer`):
            The optimizer to wrap.
        device_placement (`bool`, *optional*, defaults to `True`):
            Whether or not the optimizer should handle device placement. If so, it will place the state dictionary of
            `optimizer` on the right device.
        scaler (`torch.amp.GradScaler` or `torch.cuda.amp.GradScaler`, *optional*):
            The scaler to use in the step function if training with mixed precision.
    TNc                 C   s   || _ || _t | _t | _|| _d| _| jd ur)d| _| j j	| _
t| | j j	| _|rO| j  }| jjtjkr@t|| jj nt|| jj}| j | d S d S NF)	optimizerscalerr   accelerator_stater   gradient_statedevice_placement_is_overflow_accelerate_step_calledstep_optimizer_original_step_methodpatch_optimizer_step_optimizer_patched_step_method
state_dictdistributed_typer   XLAxmsend_cpu_data_to_devicer   r   load_state_dict)selfr#   r'   r$   r.   r   r   r   __init__7   s"   


zAcceleratedOptimizer.__init__c                 C      | j jS r	   r#   r    r4   r   r   r   r    M      zAcceleratedOptimizer.statec                 C      || j _d S r	   r7   r4   r    r   r   r   r    Q      c                 C   r6   r	   r#   param_groupsr8   r   r   r   r>   U   r9   z!AcceleratedOptimizer.param_groupsc                 C   r:   r	   r=   )r4   r>   r   r   r   r>   Y   r<   c                 C   r6   r	   r#   defaultsr8   r   r   r   r@   ]   r9   zAcceleratedOptimizer.defaultsc                 C   r:   r	   r?   )r4   r@   r   r   r   r@   a   r<   c                 C   s   | j | d S r	   )r#   add_param_group)r4   param_groupr   r   r   rA   e   s   z$AcceleratedOptimizer.add_param_groupc                 C   s4   | j jtjkr| jrt|| j j | j	| d S r	   )
r%   r/   r   r0   r'   r1   r2   r   r#   r3   )r4   r.   r   r   r   r3   h   s   z$AcceleratedOptimizer.load_state_dictc                 C   s
   | j  S r	   )r#   r.   r8   r   r   r   r.   m   s   
zAcceleratedOptimizer.state_dictc                 C   s`   | j jr.dt| jjjv }|r|d u rd}| jj|d d S |d ur'td| j  d S d S )Nset_to_noneT)rC   zJ`set_to_none` for Optimizer.zero_grad` is not supported by this optimizer.)r&   sync_gradientsinspect	signaturer#   	zero_grad
parameters
ValueError)r4   rC   
accept_argr   r   r   rG   p   s   zAcceleratedOptimizer.zero_gradc                 C   sj   t | jdrt| jjr| j  dS t | jdr/t | jjdr1t| jjjr3| jj  dS dS dS dS )z`
        Sets the optimizer to "train" mode. Useful for optimizers like `schedule_free`
        trainr#   N)hasattrr#   callablerK   r8   r   r   r   rK   |   s   
zAcceleratedOptimizer.trainc                 C   s.   t | jdrt| jjr| j  dS dS dS )z_
        Sets the optimizer to "eval" mode. Useful for optimizers like `schedule_free`
        evalN)rL   r#   rM   rN   r8   r   r   r   rN      s   zAcceleratedOptimizer.evalc                 C   s   t  rddlm}m} | jjs,| jjtj	kr,t
| j}t
jd|dt  d d| j_t  r9t| j||fr9d S | jjrm| jd urg| j| j_| j| j| | j  | js[d| _nd| _| j| j_d| _n| j| | jjtj	krzd| j_d S d S )Nr   )AdaLomoLomosumg      ?)scaleTF)r   
lomo_optimrO   rP   r&   is_xla_gradients_syncedr%   r/   r   r0   r1   _fetch_gradientsr#   
all_reducexr
world_sizer   rD   r$   r-   r*   updater)   r(   r+   )r4   closurerO   rP   	gradientsr   r   r   r*      s2   



zAcceleratedOptimizer.stepc                    s,   | j jD ]} fdd|d D |d< qd S )Nc                    s   g | ]}  ||qS r   )get)r   pparameters_mapr   r   
<listcomp>   s    z;AcceleratedOptimizer._switch_parameters.<locals>.<listcomp>paramsr=   )r4   r_   rB   r   r^   r   _switch_parameters   s   z'AcceleratedOptimizer._switch_parametersc                 C   s   | j S )z.Whether or not the optimizer step was skipped.)r(   r8   r   r   r   step_was_skipped   s   z%AcceleratedOptimizer.step_was_skippedc                    s    g d  fdd| j  D S )N)r)   r+   r-   c                    s   i | ]\}}| vr||qS r   r   r   _ignored_keysr   r   r      s    z5AcceleratedOptimizer.__getstate__.<locals>.<dictcomp>)__dict__r   r8   r   rd   r   __getstate__   s   z!AcceleratedOptimizer.__getstate__c                 C   s>   | j | | jd urd| _| jj| _t| | jj| _d S d S r"   )	rf   rY   r$   r)   r#   r*   r+   r,   r-   r;   r   r   r   __setstate__   s   

z!AcceleratedOptimizer.__setstate__)TNr	   )__name__
__module____qualname____doc__r5   propertyr    setterr>   r@   rA   r3   r.   rG   rK   rN   r*   rb   rc   rg   rh   r   r   r   r   r!   &   s6    








&
r!   accelerated_optimizerc                    s    fdd}|S )Nc                     s   d _ | i |S )NT)r)   )argskwargsro   methodr   r   patched_step   s   z*patch_optimizer_step.<locals>.patched_stepr   )ro   rs   rt   r   rr   r   r,      s   r,   )rE   r   r    r   r   utilsr   r   r   r   torch_xla.core.xla_modelcore	xla_modelr1   torch_xla.runtimeruntimerW   r   optim	Optimizerr!   r,   r   r   r   r   <module>   s   
 +