o
    0i                     @   s8  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlZddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	lmZmZm Z  dd
l!m"Z" ddl#m$Z$m%Z% g fddZ&dd Z'de j(de)e*e*f fddZ+de j(de,e-e* e)e*e*f f fddZ.de j(de)e*e*f fddZ/de j(de)de,e-e* e)e*e*f f fddZ0de j(de,e-e* e)e*e*f f fddZ1	d,de j(de)e*e*f de2de,e j(e)e*e*f f fd d!Z3d"e-e* de)e*e*f fd#d$Z4d%ede j(de,e j(e)e*e
f f fd&d'Z5d(d) Z6G d*d+ d+Z7dS )-    Nliteral_evalwhich)Any   )SageMakerConfig)DynamoBackendPrecisionTypeis_ccl_availableis_fp8_availableis_hpu_availableis_ipex_availableis_mlu_availableis_musa_availableis_npu_availableis_sdaa_availableis_torch_xla_availableis_xpu_available)DEEPSPEED_MULTINODE_LAUNCHERS)get_free_portis_port_in_usemerge_dicts)compare_versions   )DistributedTypeSageMakerDistributedTypec                 C   sD   | |\}}t|  D ]\}}|t| v rt||| q|S )z4
    Filters out all `accelerate` specific args
    )parse_known_argsvarsitemskeyssetattr)argsparserdefault_argsnew_args_keyvalue r)   Y/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/accelerate/utils/launch.py_filter_args/   s   r+   c                  C   s\   dd dD } t | dkrtd| d }t|dg}d|v r'|dd	d
dfS |dd	ddfS )a  
    Determines the executable and argument names for mpirun, based on the type of install. The supported MPI programs
    are: OpenMPI, Intel MPI, or MVAPICH.

    Returns: Program name and arg names for hostfile, num processes, and processes per node
    c                 S   s   g | ]}t |r|qS r)   r   ).0xr)   r)   r*   
<listcomp>B   s    z$_get_mpirun_args.<locals>.<listcomp>)mpirunmpiexecr   z\mpirun or mpiexec were not found. Ensure that Intel MPI, Open MPI, or MVAPICH are installed.z	--versions   Open MPI
--hostfilez-nz
--npernodez	--bind-toz-fz-ppn )lenOSError
subprocesscheck_output)mpi_appsmpi_appmpirun_versionr)   r)   r*   _get_mpirun_args:   s   r:   r"   current_envc                 C   s   d}t | D ]B}|drHt| |}|durH|dkr9t|d ||d < t|d ||d < t|d	 ||d
 < qtt| ||| |  < q|S )z.
    Setup the FP8 environment variables.
    ACCELERATE_fp8_Nfp8_override_linear_precisionr   FP8_OVERRIDE_FPROPr   FP8_OVERRIDE_DGRADr   FP8_OVERRIDE_WGRAD)r   
startswithgetattrstrupper)r"   r;   prefixargr(   r)   r)   r*   setup_fp8_envR   s   

rH   returnc              	   C   s  g }| j r| jrtdt| dd}| j}| jdurPt \}}}}}t| dd}	|r2|r2t|| nd}
|||| j||
g7 }|rH||t|g7 }|rP|||	g7 }| j sa|t	j
 | jra|d || j || j tj }t| jpx| j|d< | jrd	|d
< | jdkr| jdurt r| j|d< n2t r| j|d< n)t r| j|d< n t r| j|d< nt r| j|d< nt r| j|d< n| j|d< |dkr| jdusJ d| jdusJ dt rt| ddnd}|dur|dks|dkr| jdur| jnd|d< | jdurt| jnd|d< t||d< |d r&d|d< td|d< z	t| j  }W n tyG   td | j   d!t!  d"w t||d#< | j  d$krct" s^t#d%t$| |}z	t%| j&' }W n ty   td&| j&'  d!t%!  d"w |j(|d'< | j)|d(< t| j*|d)< t| j+|d*< t| j,|d+< t| j-|d,< t. rt| j/  |d-< | j0rd|d.< ||fS )/zz
    Prepares and returns the command list and an environment with the correct simple launcher environment variables.
    0--module and --no_python cannot be used togethernum_processesNzbind-tosocket1z-mACCELERATE_USE_CPUtrueACCELERATE_DEBUG_MODEallZE_AFFINITY_MASKMLU_VISIBLE_DEVICESSDAA_VISIBLE_DEVICESMUSA_VISIBLE_DEVICESASCEND_RT_VISIBLE_DEVICESHABANA_VISIBLE_MODULESCUDA_VISIBLE_DEVICESr   zFWhen using multiple machines, you need to specify the main process IP.zHWhen using multiple machines, you need to specify the main process port.
mpirun_cclr   z	127.0.0.1MASTER_ADDR29500MASTER_PORTCCL_WORKER_COUNTzgranularity=fine,compact,1,0KMP_AFFINITYKMP_BLOCKTIMEUnknown mixed_precision mode: . Choose between .ACCELERATE_MIXED_PRECISIONfp8rFP8 is not available on this machine. Please ensure that either Transformer Engine, MSAMP or torchao is installed.Unknown dynamo backend: ACCELERATE_DYNAMO_BACKENDACCELERATE_DYNAMO_MODEACCELERATE_DYNAMO_USE_FULLGRAPHACCELERATE_DYNAMO_USE_DYNAMIC*ACCELERATE_DYNAMO_USE_REGIONAL_COMPILATIONOMP_NUM_THREADSACCELERATE_USE_IPEXACCELERATE_CPU_AFFINITY)1	no_pythonmodule
ValueErrorrC   num_machinesmpirun_hostfiler:   rD   appendsys
executabletraining_scriptextendtraining_script_argsosenvironcopycpuuse_cpudebuggpu_idsr   r   r   r   r   r   main_process_ipmain_process_portr   r
   mixed_precisionlowerlistr   RuntimeErrorrH   r	   dynamo_backendrE   r(   dynamo_modedynamo_use_fullgraphdynamo_use_dynamicdynamo_use_regional_compilationnum_cpu_threads_per_processr   ipexenable_cpu_affinity)r"   cmdrK   rr   mpi_app_namehostfile_argnum_proc_argproc_per_node_argbind_to_argbind_tonproc_per_noder;   ccl_worker_countr   r   r)   r)   r*   prepare_simple_launcher_cmd_envd   s   







r   c              	   C   s0  | j dkr
t | _ n| j du rd| _ | j}| j}| j}| j }|dkrNt|| | _t|| _t| j	| _
t| ddrEt|| _t|| _n| d| | _nt|| _|dur\t|| _|dkpft| j	dk}|rt|r|dkr~d| _td	| d
 ntd| d| jr| jrtd| jrd| _n| jrd| _tj }| jrd|d< t| dd}|dkr| jdurt r||d< n,t r||d< n$t r||d< nt r||d< nt  r||d< nt! r||d< n||d< | j"# }zt$|}W n ty   td| dt$%  dw t||d< | j"# dkr(t& s#t'dt(| |}z	t)| j*+ }	W n tyI   td| j*+  dt)%  dw |	j,|d < | j-|d!< t| j.|d"< t| j/|d#< t| j0|d$< | j1rd|d%< | j2r}| j3s}td&t4| d'rt| j5nd(|d)< t| j6|d*< t| j7# |d+< t| j8# |d,< t| j9|d-< | j:durt| j:|d.< | j;durt| j;|d/< | j<durt| j<|d0< | j=durt| j=|d1< t| j># |d2< t| j?# |d3< t| j2# |d4< t| j3# |d5< t| j@# |d6< t| d7ddurt| jA|d8< | jBr~d9}
d|d:< t| jC||
d; < t| jD||
d< < t| jE||
d= < | jFdurQt| jF||
d> < | jGdur`t| jG||
d? < | jHdurot| jH||
d@ < | jIdur~t| jI||
dA < t| jJ|dB< | jKrd(|dC< | jLrtM| |}|S )Dz_
    Prepares and returns an environment with the correct multi-GPU environment variables.
    r   N<s  r   same_networkF:TPort `  ` is already in use. Accelerate will attempt to launch in a standalone-like mode by finding an open port automatically for this session. If this current attempt fails, or for more control in future runs, please specify a different port (e.g., `--main_process_port <your_chosen_port>`) or use `--main_process_port 0` for automatic selection in your launch command or Accelerate config file.3Tried to launch distributed communication on port `$  `, but another process is utilizing it. Please specify a different port (such as using the `--main_process_port` flag or specifying a different `main_process_port` in your config file) and rerun your script. To automatically use the next open port (on a single node), you can set this to `0`.rJ   rO   rP   r   rQ   rR   rS   rT   rU   rV   rW   rX   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   ACCELERATE_USE_FSDPzWWhen using `--fsdp_cpu_ram_efficient_loading` set `--fsdp_sync_module_states` to `True`fsdp_versionrM   FSDP_VERSIONFSDP_SHARDING_STRATEGYFSDP_RESHARD_AFTER_FORWARDFSDP_OFFLOAD_PARAMSFSDP_MIN_NUM_PARAMSFSDP_AUTO_WRAP_POLICYFSDP_TRANSFORMER_CLS_TO_WRAPFSDP_BACKWARD_PREFETCHFSDP_STATE_DICT_TYPEFSDP_FORWARD_PREFETCHFSDP_USE_ORIG_PARAMSFSDP_CPU_RAM_EFFICIENT_LOADINGFSDP_SYNC_MODULE_STATESFSDP_ACTIVATION_CHECKPOINTINGfsdp_ignored_modulesFSDP_IGNORED_MODULESMEGATRON_LM_ACCELERATE_USE_MEGATRON_LM	TP_DEGREE	PP_DEGREEGRADIENT_CLIPPINGNUM_MICRO_BATCHESSEQUENCE_PARALLELISMRECOMPUTE_ACTIVATIONSUSE_DISTRIBUTED_OPTIMIZERrl   rn   )Nr   r   rK   rr   r   rD   r   nnodesintmachine_rank	node_rankrC   master_addrmaster_portrdzv_endpointr   
standalonewarningswarnConnectionErrorrp   ro   rq   rz   r{   r|   r   r   r   r   r   r   r   r   r   r   r
   r   r   r   rH   r	   r   rE   r(   r   r   r   r   use_fsdpfsdp_cpu_ram_efficient_loadingfsdp_sync_module_stateshasattrr   fsdp_sharding_strategyfsdp_reshard_after_forwardfsdp_offload_paramsfsdp_min_num_paramsfsdp_auto_wrap_policy"fsdp_transformer_layer_cls_to_wrapfsdp_backward_prefetchfsdp_state_dict_typefsdp_forward_prefetchfsdp_use_orig_paramsfsdp_activation_checkpointingr   use_megatron_lmmegatron_lm_tp_degreemegatron_lm_pp_degreemegatron_lm_gradient_clippingmegatron_lm_num_micro_batches megatron_lm_sequence_parallelism!megatron_lm_recompute_activations%megatron_lm_use_distributed_optimizerr   r   use_parallelism_config%prepare_extend_env_parallelism_config)r"   rK   rr   r   r   need_port_checkr;   r   r   r   rF   r)   r)   r*   prepare_multi_gpu_env   s   




















r   c                 C   s   d}d|d< t | j||d < t | j||d < t | j||d < t | j||d < t | j||d < t | j||d	 < t | j||d
 < | jdkrSt | j||d < | jdkrst | j	||d < t | j
||d < t | j||d < |S )zV
    Extends `current_env` with context parallelism env vars if any have been set
    PARALLELISM_CONFIG_rO   !ACCELERATE_USE_PARALLELISM_CONFIGDP_REPLICATE_SIZEDP_SHARD_SIZETP_SIZECP_SIZE
CP_BACKENDSP_SIZE
SP_BACKENDr   CP_COMM_STRATEGYSP_SEQ_LENGTHSP_SEQ_LENGTH_IS_VARIABLESP_ATTN_IMPLEMENTATION)rD   $parallelism_config_dp_replicate_size parallelism_config_dp_shard_sizeparallelism_config_tp_sizeparallelism_config_cp_sizeparallelism_config_cp_backendparallelism_config_sp_sizeparallelism_config_sp_backend#parallelism_config_cp_comm_strategy parallelism_config_sp_seq_length,parallelism_config_sp_seq_length_is_variable)parallelism_config_sp_attn_implementation)r"   r;   rF   r)   r)   r*   r   g  s    

r   c           
   	   C   sH  | j dkr
t | _ n| j du rd| _ | j}| j}| j}| j }d}| jdu r*td | _|dkr| jtd krdg}|dt| j	g | jdkr]t
ddd	rQtd
|dt| jdg n|ddt| jg | jdurx|dt| jg n| jdur|dt| jg n|dt| j| j g |r|dt|g |dt|g | jr| jrtd| jr|d n| jr|d || j || j nI|dkr| jtd krt|| | _t|| _t| j| _t| ddrt|| _t|| _n| d| | _nt|| _|durt|| _|dkp%t| jdk}|rHt|rH|dkr@d| _td| d nt d| d| jrT| jrTtd| jr\d| _n| jrcd| _t!j"# }| j$rpd|d < t| d!d"}|d"kr| j%durt& r||d#< n1t' r||d$< n(t( r||d%< nt) r||d&< nt* r||d'< nt+ r||d(< n||d)< z	t,| j-. }	W n ty   td*| j-.  d+t,/  d,w t0d-t!j12d,|d-< t|	|d.< | j-. d/krt3 st4d0t5| |}t| j6. |d1< d|d2< | j7durt| j7|d3< | j8dur*t| j8|d4< | j9dur9t| j9. |d5< | j:durHt| j:. |d6< | j;durWt| j;. |d7< | j<durft| j<. |d8< | j=durut| j=. |d9< | j>durt| j>|d:< | j?rd;|d<< | j@durt| j@|d=< | jArtB| |}||fS )>zt
    Prepares and returns the command list and an environment with the correct DeepSpeed environment variables.
    r   Nr   r   	deepspeedr1   nossh<z0.14.5z+nossh launcher requires DeepSpeed >= 0.14.5z--node_rankz--no_sshz--no_local_rankz
--launcherz	--excludez	--includez
--num_gpusz--master_addrz--master_portrJ   z--modulez--no_pythonr   Fr   Tr   r   r   r   rO   rP   r   rQ   rR   rS   rT   rU   rV   rW   rX   r`   ra   rb   
PYTHONPATHrc   rd   re   ACCELERATE_CONFIG_DS_FIELDSACCELERATE_USE_DEEPSPEEDACCELERATE_DEEPSPEED_ZERO_STAGE&ACCELERATE_GRADIENT_ACCUMULATION_STEPSACCELERATE_GRADIENT_CLIPPING-ACCELERATE_DEEPSPEED_OFFLOAD_OPTIMIZER_DEVICE)ACCELERATE_DEEPSPEED_OFFLOAD_PARAM_DEVICEACCELERATE_DEEPSPEED_ZERO3_INIT+ACCELERATE_DEEPSPEED_ZERO3_SAVE_16BIT_MODEL ACCELERATE_DEEPSPEED_CONFIG_FILErM   rn   (ACCELERATE_DEEPSPEED_MOE_LAYER_CLS_NAMES)Cr   r   rK   rr   r   deepspeed_multinode_launcherr   rx   rD   deepspeed_hostfiler   rq   r   deepspeed_exclusion_filterdeepspeed_inclusion_filterrp   ro   rt   rw   ry   r   r   r   r   rC   r   r   r   r   r   r   r   r   rz   r{   r|   r   r   r   r   r   r   r   r   r
   r   r   r   env_var_path_addpathabspathr   r   rH   'deepspeed_fields_from_accelerate_config
zero_stagegradient_accumulation_stepsgradient_clippingoffload_optimizer_deviceoffload_param_devicezero3_init_flagzero3_save_16bit_modeldeepspeed_config_filer   deepspeed_moe_layer_cls_namesr   r   )
r"   rK   rr   r   r   r   r   r;   r   r   r)   r)   r*   prepare_deepspeed_cmd_env  s   

























r  Fpodc                 C   sV   | j dkrtddr| jrd|d< nd|d< | jrd|d< |r'| j| _| j| _| |fS )	zY
    Prepares and returns an environment with the correct TPU environment variables.
    bf16T)check_is_tpurM   XLA_DOWNCAST_BF16XLA_USE_BF16rO   rP   )r   r   downcast_bf16r   tpu_vmvmtpu_nametpu)r"   r;   r  r)   r)   r*   prepare_tpu$  s   
r$  nargsc                 C   s   t | dk ri S dd }t }|| \}}t|D ]8\}}|drSd }|d t |k r<||d  dr;tdntd|d u rL|j||d q|j||d qd	d
 || j	
 D S )Nr   c                 S   s<   zt | } | d | krt| W S | W S  ty   |  Y S w )Nr   )floatr   rq   )sr)   r)   r*   _infer_type=  s   
z+_convert_nargs_to_dict.<locals>._infer_type)-z--r   ul   SageMaker doesn’t support argparse actions for `store_true` or `store_false`. Please define explicit types)type)actionc                 S   s&   i | ]\}}||d v rt |n|qS ))TrueFalser   )r,   r'   r(   r)   r)   r*   
<dictcomp>\  s    z*_convert_nargs_to_dict.<locals>.<dictcomp>)r3   argparseArgumentParserr   	enumeraterB   rq   add_argument
parse_args__dict__r   )r%  r(  r#   r&   unknownindexargumentr+  r)   r)   r*   _convert_nargs_to_dict8  s0   

r8  sagemaker_configc                 C   sR  t d | jtjd< | jd ur| jtjd< n|jd ur-|jd ur-|jtjd< |jtjd< ntdtj	|j
}|s<d}tj|j
}|dsPtd	| d
t d t|j}z	t|j }W n tyy   td|j  dt  dw z	t|j }W n ty   td|j  dt  dw dt||j|jt|jt|jt|j| jjd}|j dkrt stdt ||}d }| jt!j"krddddiii}d }	| j#d ur%t d| j# d i }	t$| j#%}
t%|
D ]\}}|dkrq|&d}|d ' |	|d < qW d    n	1 sw   Y  t d|	  d }| j(d ur{t d| j( d g }t$| j(.}
t%|
D ]!\}}|dkrMqB|&d}|d |d ' d}|)| qBW d    n	1 sow   Y  t d|  t d  | j*||| j+| j,| j-| j.| j/| j0| j1d!||||d"}| j2d urt3| j2|}||	fS )#Nz(Configuring Amazon SageMaker environmentAWS_DEFAULT_REGIONAWS_PROFILEAWS_ACCESS_KEY_IDAWS_SECRET_ACCESS_KEYz]You need to provide an aws_access_key_id and aws_secret_access_key when not using aws_profilerb   z.pyz8Your training script should be a python script and not ""z'Converting Arguments to Hyperparametersr`   ra   rf   rO   )ACCELERATE_USE_SAGEMAKERrc   rg   rh   ri   rj   rk   %ACCELERATE_SAGEMAKER_DISTRIBUTED_TYPErd   re   smdistributeddataparallelenabledTzLoading SageMaker Inputs from z filer   	r   zLoaded SageMaker Inputs: zLoading SageMaker Metrics from )NameRegexzLoaded SageMaker Metrics: zCreating EstimatorF)	image_urientry_point
source_dirroletransformers_versionpytorch_version
py_versionbase_job_nameinstance_countinstance_typedebugger_hook_configdistributionhyperparametersenvironmentmetric_definitions)4printregionrz   r{   profileaws_access_key_idaws_secret_access_keyr4   r  dirnamerw   basenameendswithrq   r8  ry   r
   r   r   r   r	   r   rE   rD   r(   r   r   r   r   distributed_typer   r   rH   r   DATA_PARALLELsagemaker_inputs_fileopenr1  splitstripsagemaker_metrics_filert   rG  iam_role_namerK  rL  rM  rN  rr   ec2_instance_typeadditional_argsr   )r9  r"   rI  rH  rS  r   r   rT  rR  sagemaker_inputsfileilinelsagemaker_metricsmetric_dictr)   r)   r*   prepare_sagemager_args_inputsb  s   









ro  c                 C   s6   dd t j| ddD }|t| d|S )z
    Extends a path-based environment variable's value with a new path and returns the updated value. It's up to the
    caller to set it in os.environ.
    c                 S   s   g | ]
}t |d kr|qS )r   )r3   )r,   pr)   r)   r*   r.     s    z$env_var_path_add.<locals>.<listcomp>r2   r   )rz   r{   getrb  rt   rD   join)env_var_namepath_to_addpathsr)   r)   r*   r    s   
r  c                   @   s"   e Zd ZdZd	ddZdd ZdS )
PrepareForLaunchai  
    Prepare a function that will launched in a distributed setup.

    Args:
        launcher (`Callable`):
            The function to launch.
        distributed_type ([`~state.DistributedType`]):
            The distributed type to prepare for.
        debug (`bool`, *optional*, defaults to `False`):
            Whether or not this is a debug launch.
    NOFc                 C   s   || _ t|| _|| _d S )N)launcherr   r^  r   )selfrx  r^  r   r)   r)   r*   __init__  s   

zPrepareForLaunch.__init__c                 G   s   | j r!ttjd}tjd}tjjd|tj|||d n5| j	t
jt
jt
jt
jt
jt
jfv rVt|tjd< ttjdd}ttjdd	}t|| | tjd
< tdtjd< | j|  d S )N
WORLD_SIZEACCELERATE_DEBUG_RDV_FILEgloo)rankstore
world_size
LOCAL_RANKNPROCr   	NODE_RANKr   RANKFORK_LAUNCHED)r   r   rz   r{   rq  torchdistributedinit_process_group	FileStorer^  r   	MULTI_GPU	MULTI_MLU
MULTI_MUSA	MULTI_NPU	MULTI_XPU	MULTI_CPUrD   rx  )ry  r6  r"   r  rdv_filenprocr   r)   r)   r*   __call__  s.   	zPrepareForLaunch.__call__N)rw  F)__name__
__module____qualname____doc__rz  r  r)   r)   r)   r*   rv    s    
rv  )F)8r/  rz   r5   ru   r   astr   shutilr   typingr   r  commands.config.config_argsr   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   utils.constantsr   utils.otherr   r   r   utils.versionsr   dataclassesr   r   r+   r:   	NamespacedictrD   rH   tupler   r   r   r   r  boolr$  r8  ro  r  rv  r)   r)   r)   r*   <module>   sd   8(g 
( $

*
v
