o
    0if                     @   s   d Z ddlZddlZddlZddlZddlZddlmZ ddlZddl	m
Z
 ddlmZmZmZmZmZmZmZmZmZ ddlmZ dd	d
Zdd ZdedefddZ			ddee dedee fddZdefddZdS )z
A collection of utilities for ensuring that training can always occur. Heavily influenced by the
[toma](https://github.com/BlackHC/toma) library.
    N)Optional)version   )	is_cuda_availableis_hpu_availableis_ipex_availableis_mlu_availableis_mps_availableis_musa_availableis_npu_availableis_sdaa_availableis_xpu_available)compare_versionsFc                 C   s   | rt   t rtj  dS t rtj  dS t r$tj	  dS t
 r.tj  dS t r8tj  dS tddrDtj  dS t rNtj  dS t rT	 dS dS )z
    Clears the device cache by calling `torch.{backend}.empty_cache`. Can also run `gc.collect()`, but do note that
    this is a *considerable* slowdown and should be used sparingly.
    z2.0)min_versionN)gccollectr   torchxpuempty_cacher   mlur   sdaar
   musar   npur	   mpsr   cudar   garbage_collection r   Y/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/accelerate/utils/memory.pyclear_device_cache,   s&   
r   c                  G   s:   t | ts	t| } tt| D ]}d| |< qtdd | S )aN  
    Releases memory from `objects` by setting them to `None` and calls `gc.collect()` and `torch.cuda.empty_cache()`.
    Returned objects should be reassigned to the same variables.

    Args:
        objects (`Iterable`):
            An iterable of objects
    Returns:
        A list of `None` objects to replace `objects`

    Example:

        ```python
        >>> import torch
        >>> from accelerate.utils import release_memory

        >>> a = torch.ones(1000, 1000).cuda()
        >>> b = torch.ones(1000, 1000).cuda()
        >>> a, b = release_memory(a, b)
        ```
    NTr   )
isinstancelistrangelenr   )objectsir   r   r   release_memoryG   s   


r&   	exceptionreturnc                    s:   g d}t  trt jdkrt fdd|D S dS )z
    Checks if `exception` relates to CUDA out-of-memory, XPU out-of-memory, CUDNN not supported, or CPU out-of-memory

    Args:
        exception (`Exception`):
            An exception
    )z out of memory.z(cuDNN error: CUDNN_STATUS_NOT_SUPPORTED.z*DefaultCPUAllocator: can't allocate memoryz1FATAL ERROR :: MODULE:PT_DEVMEM Allocation failedr   c                 3   s    | ]
}| j d  v V  qdS )r   N)args).0errr'   r   r   	<genexpr>t   s    z+should_reduce_batch_size.<locals>.<genexpr>F)r    RuntimeErrorr#   r)   any)r'   _statementsr   r,   r   should_reduce_batch_sizee   s   r1      functionstarting_batch_sizereduce_batch_size_fnc                    sB   du rt jt|dS | du r fdd fdd}|S )a  
    A basic decorator that will try to execute `function`. If it fails from exceptions related to out-of-memory or
    CUDNN, the batch size is multiplied by 0.9 and passed to `function`

    `function` must take in a `batch_size` parameter as its first argument.

    Args:
        function (`callable`, *optional*):
            A function to wrap
        starting_batch_size (`int`, *optional*):
            The batch size to try and fit into memory

    Example:

    ```python
    >>> from accelerate.utils import find_executable_batch_size


    >>> @find_executable_batch_size(starting_batch_size=128)
    ... def train(batch_size, model, optimizer):
    ...     ...


    >>> train(model, optimizer)
    ```
    N)r4   c                      s   t  d   S )Ng?)intr   )
batch_sizer   r   r5      s   z8find_executable_batch_size.<locals>.reduce_batch_size_fnc               
      s   t dd ttj }t|t| d k r>ddd t|dd  | dd  D }t	dj
 dj
 d	| d
	  dkrGtdz g| R i |W S  tys } zt|rht dd   n W Y d }~nd }~ww q?)NTr   r   z, c                 S   s   g | ]\}}| d | qS )=r   )r*   argvaluer   r   r   
<listcomp>   s    zAfind_executable_batch_size.<locals>.decorator.<locals>.<listcomp>zBatch size was passed into `zS` as the first argument when called.Remove this as the decorator already does so: `(z)`r   z-No executable batch size found, reached zero.)r   r!   inspect	signature
parameterskeysr#   joinzip	TypeError__name__r.   	Exceptionr1   )r)   kwargsparamsarg_strer7   r3   r5   r   r   	decorator   s2   
*

z-find_executable_batch_size.<locals>.decorator)	functoolspartialfind_executable_batch_size)r3   r4   r5   rK   r   rJ   r   rN   x   s   rN   device_indexc                 C   s   t tjjt djkr z	tj| d W S  ty   Y nw t r>t t	j
 d}t|ddr>ddlm} || d S td tj| S )Nz2.6r   intel_extension_for_pytorchz>=z2.5)mem_get_infozThe XPU `mem_get_info` API is available in IPEX version >=2.5 or PyTorch >=2.6. The current returned available memory is incorrect. Please consider upgrading your IPEX or PyTorch version.)r   parser   __version__releaser   rQ   rE   r   	importlibmetadatar   intel_extension_for_pytorch.xpuwarningswarnmax_memory_allocated)rO   ipex_versionrQ   r   r   r   get_xpu_available_memory   s   r\   )F)Nr2   N) __doc__rL   r   rU   r=   rX   typingr   r   	packagingr   importsr   r   r   r   r	   r
   r   r   r   versionsr   r   r&   rE   boolr1   callabler6   rN   r\   r   r   r   r   <module>   s2   ,

D