o
    i:1                     @  s   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZmZ er:dd
lmZ ddlmZ i ZeeddG dd dZG dd dZedddZd1ddZd2d#d$Zd3d4d)d*Ze Zd5d+d,Zd3d-d.d/d0ZdS )6z
Contains the logic for automatic additional output capture with our forward decorators.
This mostly describe the hooks used and the logic to make capture thread/context safe.
    )annotationsN)
ContextVar)	dataclasswraps)TYPE_CHECKING   )is_torchdynamo_compilingrequires)nn   PreTrainedModel)torch)backendsc                   @  s>   e Zd ZU dZded< dZded< dZded	< dZded
< dS )OutputRecordera  
    Configuration for recording outputs from a model via hooks.

    Attributes:
        target_class (Type): The class (e.g., nn.Module) to which the hook will be attached.
        index (Optional[int]): If the output is a tuple/list, optionally record only at a specific index.
        layer_name (Optional[str]): Name of the submodule to target (if needed), e.g., "transformer.layer.3.attn".
        class_name (Optional[str]): Name of the class to which the hook will be attached. Could be the suffix of class name in some cases.
    ztype[nn.Module]target_classr   intindexN
str | None
layer_name
class_name)__name__
__module____qualname____doc____annotations__r   r   r    r   r   e/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/transformers/utils/output_capturing.pyr   '   s   
 
r   c                   @  s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )CompileableContextVara  
    Convenience wrapper around a ContextVar for usage with `torch.compile`.
    This behaves exactly as a `ContextVar`, except when compilation is triggered in which case it behaves as a simple
    global variable. This is useful as `torch.compile` cannot trace the `get` method of `ContextVar`. This however means
    that the access to the underlying variable is not thread-safe when compilation is triggered.
    c                 C  s   t ||d| _|| _d| _d S )NdefaultF)r   context_var
global_var	compiling)selfnamer!   r   r   r   __init__B   s   
zCompileableContextVar.__init__c                 C  s(   | j r| jS t rd| _| jS | j S NT)r$   r#   r	   is_compilingr"   get)r%   r   r   r   r*   G   s   
zCompileableContextVar.getc                 C  s"   t  r|| _d| _d S | j|S r(   )r	   r#   r$   r"   set)r%   valuer   r   r   r+   S   s
   zCompileableContextVar.setc                 C  s&   | j rd | _d| _ d S | j| d S )NF)r$   r#   r"   reset)r%   tokenr   r   r   r-   [   s   
zCompileableContextVar.resetN)r   r   r   r   r'   r*   r+   r-   r   r   r   r   r   :   s    r   output_collectorr    module	nn.Modulekeystrr   r   returnNonec                   s    fdd}|  | dS )zaInstall the forward hook needed to capture the output described by `key` and `index` in `module`.c                   s   t  }|d u s| vrd S dkr%t| dkr%| |d  t|ts3| | d S |  d urD| |   d S d S )Nhidden_statesr   )_active_collectorr*   keyslenappend
isinstancetuple)r0   argsoutputcollected_outputsr   r2   r   r   output_capturing_hookj   s   
z;install_output_capuring_hook.<locals>.output_capturing_hookN)register_forward_hook)r0   r2   r   rA   r   r@   r   install_output_capuring_hookg   s   rC   parent_modulemodule_namecapture_tasks list[tuple[str, OutputRecorder]]c                 C  s   ddl m} |  D ] \}}t||st|| d| | q
t|| d| d q
|D ],\}}|jdur<t| |jsG|jdurY||jrY|j	durR|j	|vrRq-t
| ||j q-dS )a  
    Recursively install all output capturing hooks on all submodules of `parent_module`.
    Note that we need to use this recursive approach instead of simply iterating over all modules, because we want
    to respect the `capture_tasks` of all individual submodels (`PreTrainedModel` instances) in the graph. That is, once
    we reach a submodel in the graph, its children should use this submodel's `capture_tasks`, but other parts of the graph
    should not.
    r   r   .)prefixN)modeling_utilsr   named_childrenr;   recursively_install_hooks"install_all_output_capturing_hooksr   r   endswithr   rC   r   )rD   rE   rF   r   r&   r0   r2   specsr   r   r   rL   {   s   

rL   modelr   rI   r   c           
      C  s   t t| jp	i }g }| D ]>\}}t|ts|g}|D ]/}t|tsFd|v r+dnd}t|ts4dn|}t|ts=|nd}	t|	||d}|||f qq|durU|nd}t	| || d| _
dS )z
    Install the output recording hooks on all the modules in `model`. Tis will take care of correctly dispatching
    the `_can_record_outputs` property of each individual submodels in case of composite models.
    r6   r   r   N)r   r   r    T)_CAN_RECORD_REGISTRYr*   r3   	__class__itemsr;   listr   r:   rL   !_output_capturing_hooks_installed)
rP   rI   capture_flagsrF   r2   layer_specsrO   r   r   r   r   r   r   rM      s    

	
rM   c                 C  s`   t | ddrdS t t | ddr	 W d   dS t|  W d   dS 1 s)w   Y  dS )z
    Check if the model already has output capturing hooks installed, and install them if it is not already the
    case.
    Note that this is thread-safe, in case 2 (or more) threads want to install them concurrently.
    rV   FN)getattr_hook_installation_lockrM   )rP   r   r   r   maybe_install_capturing_hooks   s   
"r[   Ttie_last_hidden_statesc                  s     fdd}| dur|| S |S )a  
    Decorator to intercept specific layer outputs through hooks. The hooks are installed only once and lazily,
    the first time output capture is requested with the `output_xxx` kwargs/config.
    The implementation is fully context/thread safe, except when using `torch.compile`, as dynamo is unable to trace
    through `ContextVar` methods.

    Args:
        tie_last_hidden_states (`bool`, *optional*, defaults to `True`):
            Whether to overwrite `out.hidden_states[-1]` with the `out.last_hidden_state`.
            This is true for all language models and should be toggled off only if
            `out.hidden_states[-1]` has to be the hidden state before last layer norm, which
            is needed for some vision models (e.g. CLIP, SigLIP)
    c                   s   t   fdd}|S )Nc           
   	     s    dtjdd}ttjpi } fdd|D }d|v r0 dtjdd|d< d	|v rA dtjdd|d
< dd | D }t|dkrTt	 t
|}zg|R i  }W t
| nt
| w |D ]}	|	dkrs}n/t|dr||	 d d ||	< ||	 |j nt|dr||	 d d ||	< ||	 |j t||	 ||	< qt|	dkrt||	 trt||	 dkrt||	 dd d ||	< t||	 dd d |d|	 < qtt||	 ||	< qtt||	 ||	< qt|du r| }|S )Nreturn_dictTc              
     s6   i | ]}d |   d | tjd | dqS )output_F)r*   rY   config).0kkwargsr%   r   r   
<dictcomp>   s    (zHcapture_outputs.<locals>.wrapped_fn.<locals>.wrapper.<locals>.<dictcomp>cross_attentionsoutput_attentionsFoutput_cross_attentionsmask_decoder_attentionsoutput_mask_decoder_attentionsc                 S  s"   i | ]\}}|r| d dg qS )r_   rQ   )replace)ra   rb   vr   r   r   re      s   " r   r6   vision_hidden_stateslast_hidden_state
attentionsr   r   cross_)poprY   r`   rR   r*   r3   rS   rT   r9   r[   r7   r+   r-   hasattrr:   rm   ro   r<   r;   rU   to_tuple)
r%   r=   rd   r^   capturable_flagsrecordable_keysr?   output_tokenoutputsr2   )funcr]   rc   r   wrapper   sP   


 z4capture_outputs.<locals>.wrapped_fn.<locals>.wrapperr   )ry   rz   r\   )ry   r   
wrapped_fn   s   ?z#capture_outputs.<locals>.wrapped_fnNr   )ry   r]   r{   r   r\   r   capture_outputs   s   Cr|   )r0   r1   r2   r3   r   r   r4   r5   )rD   r1   rE   r3   rF   rG   r4   r5   )N)rP   r   rI   r   r4   r5   )rP   r   r4   r5   )r   
__future__r   	threadingcontextvarsr   dataclassesr   	functoolsr   typingr   import_utilsr	   r
   r   r   rJ   r   rR   r   r   r7   rC   rL   rM   LockrZ   r[   r|   r   r   r   r   <module>   s.   *

 
