o
    灛i@                     @  st  d dl mZ d dlZd dlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZmZ dd
lmZ d dlmZ ddlmZ d dlmZ d dlZd dlZd dlZeG dd dZdZdZeeedZ dZ!dZ"e!e!e"dZ#dd Z$d0ddZ%G dd dZ&G dd dZ'e( d d! Z)d"d# Z*d1d&d'Z+d2d(d)Z,d*d+ Z-G d,d- d-Z.G d.d/ d/Z/dS )3    )annotationsN   )get_cache_invalidating_env_varsir)backends)	GPUTarget)__version__)OutOfResources)get_cache_managerget_dump_managerget_override_manager)driver)	dataclass   )ast_to_ttir)Pathc                   @  sJ   e Zd ZU dZded< dZded< dd Zdd Zed	d
 Z	dd Z
dS )AttrsDescriptorNsetdivisible_by_16
equal_to_1c                 C  s,   | j d u r	t | _ | jd u rt | _d S d S N)r   r   r   self r   Z/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/triton/compiler/compiler.py__post_init__   s
   

zAttrsDescriptor.__post_init__c                 C  s   t | jt | jdS )Nr   r   )listr   r   r   r   r   r   to_dict      zAttrsDescriptor.to_dictc                 C  s$   t t| dg t| dg dS )Nr   r   r   )r   r   get)datar   r   r   	from_dict"   s   zAttrsDescriptor.from_dictc                 C  s,   t dd | j D }t|d S )Nc                 S  s   g | ]}t |qS r   )sorted.0xr   r   r   
<listcomp>(       z(AttrsDescriptor.hash.<locals>.<listcomp>utf-8)str__dict__valueshashlibsha256encode	hexdigest)r   keyr   r   r   hash'   s   zAttrsDescriptor.hash)__name__
__module____qualname__r   __annotations__r   r   r   staticmethodr"   r2   r   r   r   r   r      s   
 
r   z^\s*tt\.func\s+(?:public\s+)?(@\w+)(\((?:%\w+: [\S\s]+(?: \{\S+ = \S+ : \S+\})?(?:, )?)*\))\s*(attributes \{[\S\s]+\})?\s+\{\s*$z=\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\))ttirttgirptxz %\w+: ((?:[^,\s<)]+|<[^>]+>)+),?z\.param\s+\.(\w+)c                 C  s*   t d| }|d urdt|d S | S )Nz!tt\.ptr<([^,]+)*r   )researchconvert_type_reprgroup)r&   matchr   r   r   r>   E   s   r>   srcr*   c                 C  s4   d}t || }t|dksJ dt|d }|S )Nz&"triton_gpu.num-warps"\s?=\s?(\d+)\s?:r   z(Expected exactly one match for num_warpsr   )r<   findalllenint)rA   ttgir_num_warps_patternnum_warps_matches	num_warpsr   r   r   _get_num_warps_from_ir_strN   s
   rH   c                   @  s0   e Zd ZddddZdd Zdd	 Zd
d ZdS )	ASTSourceNreturnNonec                 C  sz   || _ d| _|j| _|| _|| _|| _t| jtr'dd t	| j
dD | _| jd u r0t | _| jd u r;t | _d S d S )Nr8   c                 S  s   i | ]	\}}||  qS r   )stripr%   kvr   r   r   
<dictcomp>b       z&ASTSource.__init__.<locals>.<dictcomp>,)fnextr3   name	signature	constantsattrs
isinstancer*   	enumeratesplitdictr   )r   rS   rV   rW   rX   r   r   r   __init__Z   s   

zASTSource.__init__c                 C  sh   dd t | j D }t dd | j D }| jj d| j  d| d| }t	|
d S )Nc                 S  s   g | ]\}}|qS r   r   rM   r   r   r   r'   i   r(   z"ASTSource.hash.<locals>.<listcomp>c                 s  s     | ]\}}t ||fV  qd S r   )r*   rM   r   r   r   	<genexpr>l   s    z!ASTSource.hash.<locals>.<genexpr>-r)   )r#   rV   itemsrW   rS   	cache_keyrX   r2   r-   r.   r/   r0   )r   
sorted_sigsorted_constantsr1   r   r   r   r2   h   s   $zASTSource.hashc                 C  s   t | j| |||dS )N)contextoptionscodegen_fns)r   rS   )r   re   rf   rd   r   r   r   make_irp      zASTSource.make_irc                 C  s   t  S r   )r\   r   r   r   r   parse_optionss   s   zASTSource.parse_optionsNNrJ   rK   r3   r4   r5   r]   r2   rg   ri   r   r   r   r   rI   X   s
    rI   c                   @  s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
IRSourcec                 C  s   || _ t|}|jdd  | _| | _tt| j | jtj	}|
d| _|
d}tt| j |}dd t|D | _d S )Nr   r   c                 S  s   i | ]	\}}|t |qS r   )r>   )r%   rN   tyr   r   r   rP      rQ   z%IRSource.__init__.<locals>.<dictcomp>)pathr   suffixrT   	read_textrA   r<   r=   prototype_pattern	MULTILINEr?   rU   rB   arg_type_patternrZ   rV   )r   ro   r@   rV   typesr   r   r   r]   y   s   

zIRSource.__init__c                 C  s   t | jd S )Nr)   )r-   r.   rA   r/   r0   r   r   r   r   r2      r   zIRSource.hashc                 C  s   t | j|}||_|S r   )r   parse_mlir_modulero   rd   )r   re   rf   rd   moduler   r   r   rg      s   zIRSource.make_irc                 C  s   | j dkrdt| jiS t S )Nr9   rG   )rT   rH   rA   r\   r   r   r   r   ri      s   
zIRSource.parse_optionsNrl   r   r   r   r   rm   w   s
    rm   c               
   C  s  dd l } tjtjtjt}g }ttd}|t|	 
 g7 }W d    n1 s0w   Y  tj|ddftj|ddfg}|D ]6\}}| j|g|dD ])}t|j|jjd}|t|	 
 g7 }W d    n1 syw   Y  qUqIt }ttj|dd}	 |	d
}	|	sn||	 qW d    n1 sw   Y  ||
  tj|d}
| |
gD ])}t|j|jjd}|t|	 
 g7 }W d    n1 sw   Y  qt d| S )Nr   rbcompilerztriton.compiler.r   ztriton.backends.)prefixz_C/libtriton.soTi   languager_   )pkgutilosro   dirnameabspath__file__openr-   r.   readr0   joinwalk_packagesmodule_finder	find_specrU   originupdateappenditer_modulesr   )r|   TRITON_PATHcontentsfpath_prefixesro   rz   liblibtriton_hashchunklanguage_pathr   r   r   
triton_key   sD   

r   c                 C  sZ   |dks|dkrt | |}||_|S |dks|dkr!t|  S |dkr+t|  S d S )Nr8   r9   llirr:   cubin)r   rv   rd   r   rq   
read_bytes)	full_namerT   rd   rw   r   r   r   parse   s   r   eBaseExceptionc                   s   | j dur
t| j  | jdurt| j ddg}| j g } dur8t fdd|D s1|   j  dus!t||dd D ]\}}||_qA|sPd| _dS d|d _|d | _dS )	z
    Removes code_generator.py and related files from tracebacks.

    These are uninteresting to the user -- "just show me *my* code!"
    Nz"/triton/compiler/code_generator.pyz/ast.pyc                 3  s$    | ]} j jj|r|V  qd S r   )tb_framef_codeco_filenameendswith)r%   r   tbr   r   r^      s   " z#filter_traceback.<locals>.<genexpr>r   r   )	__cause__filter_traceback__context____traceback__anyr   tb_nextzip)r   	BAD_FILESframes	cur_frame
next_framer   r   r   r      s(   






r   c                  C  s<  |d u r	t j }t|tsJ dt|}t| t }|r+t| ts'J dt| } | 	 }|	t
|p6t
 fi |}t }t  d|   d|  d|  dtt|  	}t|d }t|}	tjdddk}
tjdddk}|
rt|  nd }|rt|  nd }| j d	}|	|pi }||}tjd
ddk}|s|d urtt| }t | ||S ||d|j!|}t
 }|"|| t#|$ %| j&}|r|d7 }t'( }t')| |)| |* }z	| +|||}W n t,y } zt-|  d }~ww tjdddk}t#| |d  D ]]\}}|||}| j d| }|	.||||< |d urI|.|| |d urf|/|rft0d|  |1|}t2|||}|r|dkr|	1|}|3| t0d|  |}q%|	j.tj4|t5d|dd||< |	6|| t | ||S )Nz target must be of GPUTarget typez'source must be either AST or a filepathr_   r)   TRITON_KERNEL_OVERRIDE01TRITON_KERNEL_DUMP.jsonTRITON_ALWAYS_COMPILE)r2   targetr   USE_TTGIR_LOC.z
Overriding kernel with file r9   zCreate new locations for )defaultF)binary)7r   activeget_current_targetrY   r   make_backendrI   r*   rm   ri   r\   r   r   r2   r#   r`   r-   r.   r/   r0   r
   r}   environr    r   r   rU   	get_groupjsonloadsr   rq   CompiledKernelr+   
add_stagesr   keysindexrT   r   rd   load_dialectsget_codegen_implementationrg   	Exceptionr   puthas_fileprintget_filer   create_location_snapshotdumpsvars	put_group) rA   r   re   backend	ir_sourceextra_optionsenv_varsr1   r2   fn_cache_managerenable_overrideenable_ir_dumpfn_override_managerfn_dump_managermetadata_filenamemetadata_groupmetadata_pathalways_compilemetadatastagesfirst_stagerd   rf   rw   r   use_ttgir_locrT   
compile_irnext_moduleir_filenamer   ttgir_full_namer   r   r   compile   s   
:








r   c                   sN    fddt  D }t|dkr!tt| d j d| d|d  S )Nc                   s   g | ]}|j  r|j qS r   )ry   supports_targetr$   r   r   r   r'   2  s    z make_backend.<locals>.<listcomp>r   z! compatible backends for target (z) (z). There should only be one.r   )r   r,   rC   RuntimeErrorr   )r   activesr   r   r   r   1  s   r   c                   @  s&   e Zd Zdd Zd
ddZdd Zd	S )LazyDictc                 C  s   || _ g | _d S r   )r!   extras)r   r!   r   r   r   r]   ;  s   
zLazyDict.__init__rJ   rK   c                 C  s0   | j D ]\}}| j|| B | _q| j   | jS r   )r   r!   clearr   funcargsr   r   r   r    ?  s   
zLazyDict.getc                 C  s   | j ||f d S r   )r   r   r   r   r   r   addE  rh   zLazyDict.addNrk   )r3   r4   r5   r]   r    r   r   r   r   r   r   9  s    
r   c                      sD   e Zd ZdZdZdd Zdd Z fddZdd	 Zd
d Z	  Z
S )r   Nc                   s  ddl m} tdd | D }t| }t|d |d< |d }t|d |d |d	 |d< |d
t	t
| }|di || _t| jj}	|	| j| _|| _|| _| jj| _dd | D }
|	j  fdd|
D | _| j  | _d | _d | _d S )Nr   )
namedtuplec                 s  s&    | ]\}}| d rt|V  qdS )r   Nr   r   r%   cpr   r   r   r^   R  s   $ z*CompiledKernel.__init__.<locals>.<genexpr>cluster_dimsr   r   arch	warp_sizeKernelMetadatac                 S  s"   g | ]\}}| d st|qS )r   r   r   r   r   r   r'   `  s   " z+CompiledKernel.__init__.<locals>.<listcomp>c                   s:   i | ]}|j d d |j d d  kr| n| qS )r   N)rp   r   rq   )r%   file
binary_extr   r   rP   b  s    ,z+CompiledKernel.__init__.<locals>.<dictcomp>r   )collectionsr   nextr`   r   r   rq   tupler   r#   r   r   r   r   r   pack_metadatapacked_metadatarA   r2   rU   r   asmkernelrw   function)r   rA   r   r2   r   r   r   r   r   r   	asm_filesr   r   r   r]   P  s*   


zCompiledKernel.__init__c                 C  s   | j d urd S tj }tj| j| j| _tjj	|d }| jj
|kr-t| jj
|dtjj| j| j| jj
|\| _ | _| _| _d S )Nmax_shared_memzshared memory)rw   r   r   get_current_devicelauncher_clsrA   r   runutilsget_device_propertiessharedr	   load_binaryrU   r  r  n_regsn_spills)r   device
max_sharedr   r   r   _init_handlesm  s   

zCompiledKernel._init_handlesc                   s   |dkr|    t |S )Nr  )r  super__getattribute__)r   rU   	__class__r   r   r  {  s   zCompiledKernel.__getattribute__c           	      G  s   t jd u rd S t| j| j|d}t| jtr| jjj	d u r |S i }d}t
| jjjD ]\}}|| jjjv r?| jj| ||< q+|| ||< |d7 }q+|| jjj	|| j|f |S )N)rU   r  streamr   r   )r   launch_enter_hookr   rU   r  rY   rA   rI   rS   launch_metadatarZ   	arg_names
constexprsrW   r   r   )	r   gridr  r   retarg_dictarg_idxiarg_namer   r   r   r    s   

zCompiledKernel.launch_metadatac                   s       d d fdd
}|S )N)r  c              
     sl   | d u rt j }t j|} j | g|R  }j d  d  d | jj|tj	tj
g	|R   d S )Nr   r   r   )r   r   r  get_current_streamr  r  r  r   r   r  launch_exit_hook)r  r   r  r  r  r   r   r   runner  s   
"z*CompiledKernel.__getitem__.<locals>.runner)r  )r   r  r#  r   r"  r   __getitem__  s   zCompiledKernel.__getitem__)r3   r4   r5   r  r!  r]   r  r  r  r$  __classcell__r   r   r  r   r   I  s    r   )rA   r*   )r   r   rj   )0
__future__r   r-   r   _C.libtritonr   r   r   backends.compilerr    r   runtime.autotunerr	   runtime.cacher
   r   r   runtime.driverr   dataclassesr   code_generatorr   pathlibr   r<   	functoolsr}   r   mlir_prototype_patternptx_prototype_patternrr   mlir_arg_type_patternptx_arg_type_patternrt   r>   rH   rI   rm   	lru_cacher   r   r   r   r   r   r   r   r   r   r   <module>   sT    
	

"

"O