o
    i                     @   sv  d dl Z d dlZd dlmZ d dlmZmZ d dlZg dZ	d Z
dd Zdd Zd	d
 Zdd Zdeejeej f ddfddZde
ddfddZdd e
ddfdddeej deeejeej f  dededeeejj  deeej  ddfddZ	d!deej deddfddZ		d"deej deej ddfddZe
ddfdeej deej deddfdd ZdS )#    N)Sequence)OptionalUnion)
all_reducereduce	broadcast
all_gatherreduce_scatterc                 C   sx   t tjdstjddd dS t }| D ]%}|jr dS | s# dS |js) dS |	 }||v r4 dS |
| qdS )N_nccl_all_reducez)PyTorch is not compiled with NCCL support   
stacklevelFT)hasattrtorch_Cwarningswarnset	is_sparseis_contiguousis_cuda
get_deviceadd)tensorsdevicestensordevice r   Q/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/torch/cuda/nccl.pyis_available   s    r   c                  C   sT   t j } | d? }| d? d@ }| d@ }t j d}|dkr$|||fS ||||fS )a  
    Returns the version of the NCCL.


    This function returns a tuple containing the major, minor, and patch version numbers of the NCCL.
    The suffix is also included in the tuple if a version suffix exists.
    Returns:
        tuple: The version information of the NCCL.
           i  zutf-8 )r   r   _nccl_version_nccl_version_suffixdecode)vermajorminorpatchsuffixr   r   r   version$   s   


r+   c                   C   s
   t j S N)r   r   _nccl_unique_idr   r   r   r   	unique_id9   s   
r.   c                 C   s   t j| ||S r,   )r   r   _nccl_init_rank)	num_ranksuidrankr   r   r   	init_rank=   s   r3   inputsreturnc                 C   s&   t | tjjrt | tjrtdd S )Nz(Inputs should be a collection of tensors)
isinstancecollectionsabc	Containerr   Tensor	TypeError)r4   r   r   r   _check_sequence_typeA   s
   r<   c                 C   s4   t |  |d u r
| }t | tj| |||| d S r,   )r<   r   r   r
   r4   outputsopstreamscommsr   r   r   r   H   s
   r   )r>   outputrootr?   r@   r>   c                C   s   t |  |d ur|d urtdtjdtdd || }n$t|tjs7t|tj	j
r7tjdtdd || }n
|d u r?| | n|}tj| ||||| d S )Nz'output' and 'outputs' can not be both specified. 'outputs' is deprecated in favor of 'output', taking in a single output tensor. The signature of reduce is: reduce(inputs, output=None, root=0, op=SUM, streams=None, comms=None).z`nccl.reduce` with an output tensor list is deprecated. Please specify a single output tensor with argument 'output' instead instead.r   r   z\nccl.reduce with an output tensor list is deprecated. Please specify a single output tensor.)r<   
ValueErrorr   r   FutureWarningr6   r   r:   r7   r8   r   r   _nccl_reduce)r4   rB   rC   r?   r@   rA   r>   _outputr   r   r   r   R   s.   


r   c                 C   s   t |  tj| ||| d S r,   )r<   r   r   _nccl_broadcast)r4   rC   r@   rA   r   r   r   r   }   s   r   c                 C   s&   t |  t | tj| ||| d S r,   )r<   r   r   _nccl_all_gather)r4   r>   r@   rA   r   r   r   r      s   r   c                 C   s(   t |  t | tj| |||| d S r,   )r<   r   r   _nccl_reduce_scatterr=   r   r   r   r	      s   r	   )r   NN)NN)r7   r   collections.abcr   typingr   r   
torch.cudar   __all__SUMr   r+   r.   r3   r:   r<   r   intcudaStreamr   r   r   r	   r   r   r   r   <module>   s   "	
,


