o
    灛iL+                     @   s   d dl mZmZ d dlmZmZmZmZ d dlm	Z	 d dl
mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ e	ddG d	d
 d
ZG dd deZdS )    )BaseBackend	GPUTarget)irpassesllvmamd)	dataclass)AnyTupleN)PathT)frozenc                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	e
ed	< d
Zeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZee ed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dd Zdd  ZdS )!
HIPOptions   	num_warps   waves_per_eur   
num_stagesnum_ctasNextern_libs)r   r   r   cluster_dimsFdebugarchallow_fp8e4nvallow_fp8e4b15ieeedefault_dot_input_precision)r   allowed_dot_input_precisionsTenable_fp_fusionmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namec                 C   s   t tjd }| jd u ri nt| j}d| jv sd| jv rdnd}t| d| ddg}|D ]}t|| d	 ||< q.t| d
t	|
  | jdkrV| j| jd @ dksZJ dd S )Nlibgfx10gfx11    @   	warp_sizeocmlocklz.bcr   r   r   znum_warps must be a power of 2)r   __file__parentr   dictr   object__setattr__strtupleitemsr   )selfdefault_libdirr   r)   libsr$    r7   ^/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/triton/backends/amd/compiler.py__post_init__#   s    zHIPOptions.__post_init__c                 C   s.   d dd | j D }t|d S )N_c                 S   s   g | ]\}}| d | qS )-r7   ).0namevalr7   r7   r8   
<listcomp>1       z#HIPOptions.hash.<locals>.<listcomp>utf-8)join__dict__r3   hashlibsha256encode	hexdigest)r4   keyr7   r7   r8   hash0   s   zHIPOptions.hash)__name__
__module____qualname__r   int__annotations__r   r   r   r   r.   r   r2   r   boolr   r1   r   r   r   r   r
   r   r   r   r    r!   r#   r9   rI   r7   r7   r7   r8   r      s*   
 r   c                       s   e Zd ZedefddZdeddf fddZdefdd	Zd
d Z	dd Z
dd Zedd Zedd Zedd Zedd Zedd Zedd Zdd Ze dd Z  ZS ) 
HIPBackendtargetc                 C   s
   | j dkS )Nr"   )backend)rQ   r7   r7   r8   supports_target7   s   
zHIPBackend.supports_targetreturnNc                    s&   t  | t|jtsJ d| _d S )Nhsaco)super__init__
isinstancer   r1   
binary_ext)r4   rQ   	__class__r7   r8   rW   ;   s   
zHIPBackend.__init__c                    s8   d| j ji}| fddtj D  tdi |S )Nr   c                    s   i | ]}| v r| | qS r7   r7   )r<   koptsr7   r8   
<dictcomp>B   r@   z,HIPBackend.parse_options.<locals>.<dictcomp>r7   )rQ   r   updater   __dataclass_fields__keys)r4   r^   argsr7   r]   r8   parse_options@   s   zHIPBackend.parse_optionsc                 C   s(   |j |j|j|jd |jd |jd fS )Nr   r      )r   r   sharedr   )r4   metadatar7   r7   r8   pack_metadataE   s   zHIPBackend.pack_metadatac                 C   s
   t  }|S N)r.   )r4   codegen_fnsr7   r7   r8   get_codegen_implementationO   s   z%HIPBackend.get_codegen_implementationc                 C   s   t | d S ri   )r   load_dialects)r4   ctxr7   r7   r8   rl   S   s   zHIPBackend.load_dialectsc                  C   sp   t d} | d urt| }| r|S ttjd }| r |S td}| r*|S td}| r4|S td)NTRITON_HIP_LLD_PATHzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldz/ROCm linker /opt/rocm/llvm/bin/ld.lld not found)osgetenvr   is_filer,   r-   	Exception)lld_env_pathlldr7   r7   r8   path_to_rocm_lldV   s   
zHIPBackend.path_to_rocm_lldc                 C   s   t | j}|  tj| tj| tj	| tj
| tj| tj| tj| tj| ||  | S ri   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointeradd_combineadd_canonicalizeradd_reorder_broadcastadd_cseadd_licmadd_symbol_dcerunmodrg   optionspmr7   r7   r8   	make_ttirj   s   
zHIPBackend.make_ttirc                 C   sN  t | j}|  tj|d|j |j|j	|j
 ||  t | j}|  tj| tj| tj| tjj||j|j|j tj| tjj| tj|d |jdkrut|jrutjj| tj| tj|d tj| tj| |jdkrtjj| tj| tj| ||  | S )Nzhip:Tr   )r   rv   rw   rx   r   r{   add_convert_to_ttgpuirr   r   r)   r   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr   add_accelerate_matmulr   r   add_optimize_epilogueadd_optimize_dot_operandsr   has_matrix_core_featureadd_stream_pipelinery   r~   add_reduce_data_duplicationadd_reorder_instructionsr   r   r   r7   r7   r8   
make_ttgiry   s6   


zHIPBackend.make_ttgirc           
         s.  | }t |j}|  tjj||j tj	
| tj	| tj| d}tjj||j| tj| tj| tj	| tj	| tj| tj| tj| tjdddkrltj| tjj| || t  t }t|| t |j t d t  dd t  dd t  dd t  d	|j!d
k dd  " D }|d #tj$ |d %dd|j&|j!   |d %d|j'  |j(rdnd}|d %d| |j)r fdd|j)D }	t* |	 t+ tj,tj- | .d|d< t/  t0 S )NTTRITON_DISABLE_LINE_INFO0i  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64r(   c                 S   s   g | ]}|  s|qS r7   )is_declaration)r<   fnr7   r7   r8   r?      s    z(HIPBackend.make_llir.<locals>.<listcomp>r   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr   zdenormal-fp-math-f32c                    s    g | ]\}}t  |r|qS r7   )r   need_extern_lib)r<   r=   pathllvm_modr7   r8   r?      s     ztriton_gpu.sharedrf   )1r   rv   rw   rx   r   r   r   %add_decompose_unsupported_conversionsr   convertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirry   r~   r   add_cf_to_llvmiradd_arith_to_llvmirr   ro   environgetllvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleset_isa_versionset_abi_versionset_bool_control_constantr)   get_functionsset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr   r   r    r   link_extern_libsoptimize_moduleOPTIMIZE_O3TARGET_TRIPLEget_int_attrcleanup_bitcode_metadatar1   )
srcrg   r   r   r   _HIPBackend__HIP_FTZrw   fnsdenormal_modepathsr7   r   r8   	make_llir   sT   

zHIPBackend.make_llirc              	   C   sj   t d| }t|dksJ |d |d< t| tj|jdg |jd}t	j
ddd	kr3td
 t| |S )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   r=    FAMDGCN_ENABLE_DUMPr   1z!// -----// AMDGCN Dump //----- //)refindalllenr   translate_to_asmr   r   r   r   ro   r   r   print)r   rg   r   namesamdgcnr7   r7   r8   make_amdgcn   s   zHIPBackend.make_amdgcnc           
      C   s  t | |jd}t }t h}t 1}t|jd}|	| W d    n1 s,w   Y  t
|ddd|jd|jg W d    n1 sIw   Y  t|jd}| }	W d    n1 scw   Y  W d    |	S W d    |	S 1 s{w   Y  |	S )Nr   wbz-flavorgnuz-sharedz-orb)r   assemble_amdgcnr   rP   ru   tempfileNamedTemporaryFileopenr=   write
subprocess
check_callread)
r   rg   r   rU   	rocm_pathtmp_outtmp_infd_infd_outretr7   r7   r8   
make_hsaco   s&   




zHIPBackend.make_hsacoc                    s^    fdd|d<  fdd|d<  fdd|d<  fdd|d	<  fd
d|d< d S )Nc                        | | S ri   )r   r   rg   r   r4   r7   r8   <lambda>       z'HIPBackend.add_stages.<locals>.<lambda>r{   c                    r   ri   )r   r   r   r7   r8   r      r   ttgirc                    r   ri   )r   r   r   r7   r8   r      r   llirc                    r   ri   )r   r   r   r7   r8   r      r   r   c                    r   ri   )r   r   r   r7   r8   r     r   rU   r7   )r4   stagesr   r7   r   r8   
add_stages   s
   zHIPBackend.add_stagesc                 C   s&   t jt dgdd}| d| j S )Nz	--versionrA   )encodingr;   )r   check_outputrP   ru   rQ   )r4   versionr7   r7   r8   rI     s   zHIPBackend.hash)rJ   rK   rL   staticmethodr   rS   rW   r	   rd   rh   rk   rl   ru   r   r   r   r   r   r   	functools	lru_cacherI   __classcell__r7   r7   rZ   r8   rP   5   s.    




H

rP   )triton.backends.compilerr   r   triton._C.libtritonr   r   r   r   dataclassesr   typingr	   r
   rD   r   ro   r   r   r   pathlibr   r   rP   r7   r7   r7   r8   <module>   s    &