o
    灛i                     @  s@  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ ddlmZ d dlmZ eded	  Zed
Z G dd dej!Z"d\ddZ#G dd dZ$dd Z%i Z&d]ddZ'G dd dee  Z(dd Z)dd Z*i ddd d!d"d#d$d%d&d!d'd(d)d(d*d#d+d,d-d,d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAZ+e,e+- D ]Z.e.e+e.< qG dBdC dCe(e  Z/ed^dFdGZ0edddddddHd_dRdGZ0	d`dddddddHdadUdGZ0G dVdW dWZ1G dXdY dYZ2dZd[ Z3dS )b    )annotationsdivisionN)defaultdict)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTuple   )driver)
ModuleTypez.runtime.jitTc                      s~   e Zd ZdZd fddZedd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Z  ZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    returnNonec                   sB   t    || _t|d| _|| _h d| _i | _	d| _
d S )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstanceF)super__init__namehashlibsha256encodehasherglobalssupported_python_builtinsused_global_valsvisiting_arg_default_value)selfr$   r)   src	__class__ T/sda-disk/www/egybert/egybert_env/lib/python3.10/site-packages/triton/runtime/jit.pyr#   $   s   


zDependenciesFinder.__init__c                 C  
   | j  S N)r(   	hexdigestr-   r1   r1   r2   retH      
zDependenciesFinder.retc                 C  s   t |jtjkr|jS |j| jv rd S | j|jd }|d urG| jsGt |t	krGt
|tsGt|ddsG|j| jvrG|| jf| j|jt| jf< |S )N__triton_builtin__F)typectxastStoreidlocal_namesr)   getr,   r   r!   JITFunctionr    r*   r+   )r-   nodevalr1   r1   r2   
visit_NameL   s    
zDependenciesFinder.visit_Namec                   s    fdd|j D S )Nc                   s   g | ]}  |qS r1   )visit).0eltr6   r1   r2   
<listcomp>m       z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>)eltsr-   rB   r1   r6   r2   visit_Tuplej   s   zDependenciesFinder.visit_Tuplec                 C  sX   |  |j}t|tjr|  |j}t|tjs|d u s$t|ddtkr&d S t||jS )N__name__ )rE   valuer!   r<   	Attributer    TRITON_MODULEattr)r-   rB   lhsr1   r1   r2   visit_Attributeo   s   z"DependenciesFinder.visit_Attributec                   s>   fdd}  j}|d u s"||s"t|ts"J d|j dt|ftj  jfdd j	D D ]e}t|ts?q7||rDq7|j
}j |j @ D ].}|\}}j| \}	}|j| \}
}|	|
krtd| d|	 d	j d
|j d|
 dqQj|j tt|dd}|| }j|d q7d S )Nc                   s&   t  jrdS t| dd}|tS )NT
__module__rN   )inspect	isbuiltinfuncr    
startswithrQ   )rX   module)rB   r1   r2   is_triton_builtiny   s   
z8DependenciesFinder.visit_Call.<locals>.is_triton_builtinz
Function "zv" is being called from a Triton function but is not a Triton function itself. Decorate it with @triton.jit to fix thisc                 3  s    | ]	}  |jV  qd S r4   )rE   rO   )rF   kwr6   r1   r2   	<genexpr>   s    z0DependenciesFinder.visit_Call.<locals>.<genexpr>Global variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr   )rE   rX   r!   rA   rM   	itertoolschainmapargskeywords	cache_keyr+   keysRuntimeErrorr$   updatestrr    r(   r'   )r-   rB   r[   rX   objfunc_cache_keykvar_name_v1v2r_   keyr1   )rB   r-   r2   
visit_Callw   s>   
&zDependenciesFinder.visit_Callc                 C  s"   dd |j j D | _| | d S )Nc                 S  s   h | ]}|j qS r1   arg)rF   rt   r1   r1   r2   	<setcomp>       z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>)rc   r?   generic_visitrK   r1   r1   r2   visit_FunctionDef   s   z$DependenciesFinder.visit_FunctionDefc                   sn    fdd}t |j|j|jr|jgng |jD ]} | q||j |jd ur0 |j ||j	 d S )Nc                   sB   z j rJ d _ | D ]}|d ur | qW d _ d S d _ w )NTF)r,   rE   )defaultsexprr6   r1   r2   visit_defaults   s   

z:DependenciesFinder.visit_arguments.<locals>.visit_defaults)
r`   ra   posonlyargsrc   vararg
kwonlyargsrE   kw_defaultskwargry   )r-   rB   r{   rt   r1   r6   r2   visit_arguments   s   (


z"DependenciesFinder.visit_argumentsc                 C  s:   |  |}t|tr|  jt|O  _d S | j| d S r4   )rE   r!   r   r?   setadd)r-   rB   targetr1   r1   r2   visitAssnTarget   s   

z"DependenciesFinder.visitAssnTargetc                 C  s4   t |jdkrtd| |jd  | | d S )N   z2Simultaneous multiple assignment is not supported.r   )r   targets	TypeErrorr   rw   rK   r1   r1   r2   visit_Assign   s   zDependenciesFinder.visit_Assignc                 C     |  |j | | d S r4   r   r   rw   rK   r1   r1   r2   visit_AnnAssign      z"DependenciesFinder.visit_AnnAssignc                 C  r   r4   r   rK   r1   r1   r2   	visit_For   r   zDependenciesFinder.visit_For)r   r   )rM   rU   __qualname____doc__r#   propertyr7   rD   rL   rT   rr   rx   r   r   r   r   r   __classcell__r1   r1   r/   r2   r      s    $
- 	r   r   ri   c                 C  s&   t | tr| jS t | tr| S t| S r4   )r!   r:   rM   ri   repr)tyr1   r1   r2   _normalize_ty   s
   

r   c                   @  sn   e Zd ZdZddd	Zed
d Zedd Zedd Zedd Z	edd Z
edd Zedd ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.numr   paraminspect.Parameterdo_not_specializeboolc                 C     || _ || _|| _d S r4   )r   _paramr   )r-   r   r   r   r1   r1   r2   r#      s   
zKernelParam.__init__c                 C     | j jS r4   )r   r$   r6   r1   r1   r2   r$        zKernelParam.namec                 C  s(   | j jr| j jtjjkrdS t| j jS )NrN   )r   
annotationrV   	Parameteremptyr   r6   r1   r1   r2   r     s   zKernelParam.annotationc                 C  sZ   | j }dD ]\}}|||t| d  }|r$||v r$| |   S q|dkr+dS dS )N))uintu)r   ir   u1rN   )r   findr   )r-   r   ty1ty2widthr1   r1   r2   annotation_type  s   zKernelParam.annotation_typec                 C  s
   d| j v S )N	constexpr)r   r6   r1   r1   r2   is_constexpr  r8   zKernelParam.is_constexprc                 C  s   d| j v o| j S )Nconst)r   r   r6   r1   r1   r2   is_const  s   zKernelParam.is_constc                 C  r   r4   )r   defaultr6   r1   r1   r2   r     r   zKernelParam.defaultc                 C  s   | j jtjjkS r4   )r   r   rV   r   r   r6   r1   r1   r2   has_default"  s   zKernelParam.has_defaultN)r   r   r   r   r   r   )rM   rU   r   r   r#   r   r$   r   r   r   r   r   r   r   r1   r1   r1   r2   r      s"    







r   c                 C  sH   t | dr|  d dkrdS t| tr"| d dkrdS | dkr"dS dS )Ndata_ptr   r   Dr   1N)hasattrr   r!   r   )vr1   r1   r2   compute_spec_key'  s   
r   Fc                 C  s   | d u rdS t | trdS t | tr(d| kr| dkrdS d| kr&| dkr&dS d	S t | tr/d
S | j|f}t|d }|d u rW|d rDdndtt|d 	dd   }|t|< |S )Nnonei1   i32                u64i64fp32r   *k*r   .)
r!   r   r   r   dtype	dtype2strr@   type_canonicalisation_dictri   split)rt   r   dskresr1   r1   r2   mangle_type7  s$   



*r   c                   @  s    e Zd ZU ded< dddZdS )KernelInterfacer   runr   c                   s    fddS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                    s   j |  dd|S )NFgridwarmup)r   )rc   kwargsr   r-   r1   r2   <lambda>Y  rI   z-KernelInterface.__getitem__.<locals>.<lambda>r1   )r-   r   r1   r   r2   __getitem__S  s   zKernelInterface.__getitem__N)r   r   )rM   rU   r   __annotations__r   r1   r1   r1   r2   r   P  s   
 r   c           	      C  s@   dd |  D }dd l}| ||| |j|d}||}|S )Nc                 S  s*   i | ]\}}||j jd krt|n|qS r   )r0   rM   ri   rF   rq   rO   r1   r1   r2   
<dictcomp>^  s   * z1serialize_specialization_data.<locals>.<dictcomp>r   )r$   	signature	constantsattrsoptionsrq   )itemsjsonto_dict__dict__dumps)	r$   r   r   r   r   rq   r   rj   serialized_objr1   r1   r2   serialize_specialization_data]  s   
r   c                 C  s  t | jt |ksJ g }g }g }g }g }g }t| j |D ]e\\}}	}
|	jtjju r=|| |d| d|  n|| d|  |d| d|  |
j	r[|| q|| |
j
sj|d|  |
jrv|d|
j  q|d||
jrdndf  qd	d
d || D }d	dd |D }d	dd |D }|d d|}d|}d|||||f }dd | j D }t|d< t|d< t|| |d S )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    'z': z	=default_zcompute_spec_key(%s)z"%s"zmangle_type(%s, %s)TrueFalserN   c                 S     g | ]}|d  qS , r1   rF   xr1   r1   r2   rH         z2create_function_from_signature.<locals>.<listcomp>c                 S  r   r   r1   r   r1   r1   r2   rH     r   c                 S  r   r   r1   r   r1   r1   r2   rH     r   z**excess_kwargsr   zFdef dynamic_func(%s):
    return {%s}, (%s), (%s), (%s), excess_kwargsc                 S  s,   i | ]\}}|j tjjurd | |j qS )default_)r   rV   r   r   )rF   r$   r   r1   r1   r2   r     s
    z2create_function_from_signature.<locals>.<dictcomp>r   r   dynamic_func)r   
parameterszipr   r   rV   r   r   appendr   r   r   r   joinr   r   exec)sigkparams	func_argsdict_entriesconstexpr_valsnon_constexpr_valssignature_typesspecialisationsr$   spkpre   args_strdict_str	func_bodyfunc_namespacer1   r1   r2   create_function_from_signatureh  sJ   






r  r   r   
float8e4nvfp8e4nvfloat8e5fp8e5float8e4b15fp8e4b15float8_e4m3fn
float8e4b8fp8e4b8float8_e4m3fnuzfloat8_e5m2float8e5b16fp8e5b16float8_e5m2fnuzfloat16fp16bfloat16bf16float32r   float64fp64int8i8int16i16int32r   r   u8u16u32r   )int64uint8uint16uint32uint64c                      s   e Zd ZdZdZedd Zedd Zdd Zed&d
dZ	dd Z
dd Zdd Zdd Zdd Z		d'ddZedd Zdd Zdd Zdd Zd d! Z fd"d#Zd$d% Z  ZS )(rA   Nr   c                 C  s   t | dr| jS t| trdS t| tr*d| kr| dkrdS d| kr(| dkr(dS d	S t| tr1d
S | d u r7d S tdt|  d|  )Nr   r   r   r   r   r   r   r   r   r   zUnsupported type z for )r   r   r!   r   r   r   r   r:   rs   r1   r1   r2   _key_of  s   



zJITFunction._key_ofc                 C  sD   t | dr|  tj dkS t| tr| d dk| dkfS | d u fS )Nr   r   r   r   r   r   rA   divisibilityr!   r   rs   r1   r1   r2   _spec_of  s
   


zJITFunction._spec_ofc                   sV   ddl m} dd   fddt| j|D }dd t| j|D }|t|t|S )Nr   )AttrsDescriptorc                 S  sD   t | dr|  tj dkS t| tr| tj dkS | d u r dS dS )Nr   r   TFr)  )r   r1   r1   r2   is_divisible_by_16  s   

z3JITFunction._get_config.<locals>.is_divisible_by_16c                   s$   h | ]\}} |r|j s|jqS r1   )r   r   rF   r   rt   r-  r1   r2   ru     s    z*JITFunction._get_config.<locals>.<setcomp>c                 S  s8   h | ]\}}t |trt |ts|d kr|js|jqS )r   )r!   r   r   r   r   r.  r1   r1   r2   ru     s    )compilerr,  r   paramstuple)r-   rc   r,  divisible_by_16
equal_to_1r1   r/  r2   _get_config  s   
	

zJITFunction._get_configFc                 C  sH   | d u rdS t | tr| S t| dd }t| }|rdnd}|| S )N*i8r   r   r   r   )r!   ri   r   r   )rq   r   	dtype_str	const_strr1   r1   r2   _type_of  s   
zJITFunction._type_ofc                 C  s   t t| j|}|S r4   )dictr   
constexprs)r-   constexpr_keyr   r1   r1   r2   _make_constants  s   zJITFunction._make_constantsc                 C  s   t jd u rdS | jj}| jj}ddd t| j|d D }	| d|j d|j	 d|j
 d	|j d
|	 d}
G dd d}t||||d ||}||||j|j	|j
|j|j||d
}t j||
|||| d|i|dddS )NFr   c                 S  s    g | ]\}}|j  d | qS )z: r$   )rF   r   r   r1   r1   r2   rH   "  s     z*JITFunction._call_hook.<locals>.<listcomp>r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=]()c                   @  s   e Zd Zdd ZdS )z/JITFunction._call_hook.<locals>.JitFunctionInfoc                 S  r   r4   )rZ   r$   jit_function)r-   rZ   r$   rA  r1   r1   r2   r#   '  s   z8JITFunction._call_hook.<locals>.JitFunctionInfo.__init__N)rM   rU   r   r#   r1   r1   r1   r2   JitFunctionInfo%  s    rB  r   )
r   devicer   	num_warpsnum_ctas
num_stagesenable_fp_fusionextern_libsconfigsspecialization_datarq   )rq   r   fncompileis_manual_warmupalready_compiled)rA   
cache_hookrK  rM   rU   r   r   r1  rD  rE  rF  rG  r   rH  )r-   rq   r   rC  r   r   rI  r$   rZ   	arg_reprsr   rB  rJ  r   r1   r1   r2   
_call_hook  s6   
	 0

zJITFunction._call_hookc                 C  s   t |sJ | j| dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)callablepre_run_hooksr   )r-   hookr1   r1   r2   add_pre_run_hookE  s   zJITFunction.add_pre_run_hookc                 C  s   ddl m}m}m}m} || _|| _|| _|| _t| j| j| _dd t	| jD | _
dd t	| jD | _dd t	| jD | _dS )z1
        Precompute as much as possible.
        r   )CompiledKernelrL  	ASTSourcemake_backendc                 S  s   g | ]	\}}|j r|qS r1   r   rF   r   pr1   r1   r2   rH   W      z-JITFunction.create_binder.<locals>.<listcomp>c                 S  s   g | ]	\}}|j s|qS r1   rY  rZ  r1   r1   r2   rH   X  r\  c                 S  s    g | ]\}}|j s|js|qS r1   )r   r   rZ  r1   r1   r2   rH   Y  s    
N)r0  rV  rL  rW  rX  r  r   r1  binder	enumerateconstexpr_indicesnon_constexpr_indicesspecialised_indices)r-   rV  rL  rW  rX  r1   r1   r2   create_binderM  s   zJITFunction.create_binderc          &   
     s  t j }t j|}j|d< jD ]	}||i | qjd u r&  j|i |\}}	}
}}d|	t	|
|f }j
| |d }|d u rt j }|}||}d|vsbJ dd|vsjJ dd|vsrJ d|D ]}||jvrtd	| qtt| }fd
djD }|	d t| }dd t||D }j| f  fddt|jD }| D ]\}}t|rtd| dq||||| rd S || d }j|||jd}|j
| |< t }j D ] \\}}\}}||| } |krt d| d| d|  q|sm|d us%J t|r.||}t|}!|d }"|!dkr?|d nd}#|!dkrJ|d nd}$|j!||g|R  }%|j"|"|#|$||j#|j$|%j%j&j%j'g	|R   |S )NdebugrN   device_typez=device_type option is deprecated; current target will be usedrC  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                   s   g | ]} j | jqS r1   )r1  r$   )rF   r   r6   r1   r2   rH         z#JITFunction.run.<locals>.<listcomp>c                 S  s"   i | ]\}}||d krdn|qS )r   r6  r1   )rF   rl   r   r1   r1   r2   r     s   " z#JITFunction.run.<locals>.<dictcomp>c                   s6   i | ]\}}|j s|j d  jv s|du r|j|qS )r   N)r   r   r4  r$   )rF   r   r[  )rI  r1   r2   r     s    zCallable constexpr at index z is not supportedr   )r   r   r^   z1 has changed since we compiled this kernel, from z to r   r   )(r   activeget_current_deviceget_current_streamrc  rS  r]  rb  r   ri   cacher@   get_current_targetrX  parse_optionsr   KeyErrorr2  valuesr`  r   r   r5  r1  r   rR  r   rQ  rW  rL  objectr+   rg   launch_metadatar   functionpacked_metadatarV  launch_enter_hooklaunch_exit_hook)&r-   r   r   rc   r   rC  re  rT  
bound_argssig_and_specr   r   excess_kwargsrq   kernelr   backendr   rl   
bound_valssigkeyssigvalsr   r   r   rt   r.   not_presentr$   globals_dict_idrC   globals_dictnewVal	grid_sizegrid_0grid_1grid_2rp  r1   )rI  r-   r2   r   ]  s   










zJITFunction.runc                   sh  |r|ng } | _  j| _|| _t | _|| _t d | _ fdd| _	|| _
d | _g | _t| jj D ]\}}	|oH||v pH|	j|v }
| jt||	|
 q:tt | _| jtd| jtj d  | _tt| _d | _i | _d | _ t!j"#dddkrdn|| _$|| _%d	d
 | jD | _&dd
 | jD | _'g | _( j)| _) j*| _* j+| _+ j| _d S )Nr   c                   s   d u r j S | S r4   )rM   )rn   rK  r   r1   r2   r     rI   z&JITFunction.__init__.<locals>.<lambda>z^def\s+\w+\s*\(TRITON_DEBUG0r   Tc                 S  s   g | ]}|j qS r1   r>  rF   r[  r1   r1   r2   rH     rv   z(JITFunction.__init__.<locals>.<listcomp>c                 S  s   g | ]}|j r|jqS r1   )r   r   r  r1   r1   r2   rH     rf  ),rK  rU   rZ   versionrV   r   r   getsourcelinesstarting_line_numberr   rp  r]  r1  r^  r   rn  r$   r   r   textwrapdedent	getsourcer.   research	MULTILINEstartr   r:  rj  hashr+   rx  osenvironr@   rc  r_   	arg_namesr;  rS  r   rM   __globals__)r-   rK  r  r   rc  r_   r   rp  r   r   dnsr1   r  r2   r#     s:   "
zJITFunction.__init__c                 C  sX   | j d u r)t| j| j| jd}||   |jt| j	 | _ t
t|j | _| j S )N)r$   r)   r.   )r  r   rM   r  r.   rE   parser7   ri   r  r:  sortedr+   r   )r-   dependencies_finderr1   r1   r2   re     s   
zJITFunction.cache_keyc                O  s   | j ttj||dd|S )NTr   )r   rb   
MockTensor
wrap_dtype)r-   r   rc   r   r1   r1   r2   r     s   zJITFunction.warmupc                   s   ddl m}m}m} dd l}dd lm  tj	 }|
|}|d | jjkr4td|d  d| jj  fdd|d	  D }t|d
  }	|| |	|||d }
dd |d  D }|d }||
d |}|| j| |< |S )Nr   )r,  rL  rW  r   r$   zSpecialization data is for z but trying to preload for c                   s,   i | ]\}}| j |r  |n|qS r1   )r   is_dtyper   tlr1   r2   r     s    z'JITFunction.preload.<locals>.<dictcomp>r   r   r   c                 S  s(   i | ]\}}|t |trt|n|qS r1   )r!   r   r2  r   r1   r1   r2   r     s    r   rq   )r0  r,  rL  rW  r   triton.languagelanguager   rg  rh  loadsrK  rM   rg   r   r:  	from_dictrj  )r-   rJ  r,  rL  rW  r   rC  deserialized_objr   r   r.   r   rq   rx  r1   r  r2   preload  s*   




zJITFunction.preloadc                 C  sH   t | j}t|t jsJ t|jdksJ t|jd t js"J |S )Nr   r   )r<   r  r.   r!   Moduler   bodyFunctionDef)r-   treer1   r1   r2   r    s
   zJITFunction.parsec                 O  s   t d)Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rg   )r-   rc   r   r1   r1   r2   __call__   s   zJITFunction.__call__c                   s(   t t| || |dkrd | _d S d S )Nr.   )r"   rA   __setattr__r  )r-   r$   rO   r/   r1   r2   r  #  s   
zJITFunction.__setattr__c                 C  s   d| j  d| jj dS )NzJITFunction(:r@  )rZ   rK  rM   r6   r1   r1   r2   __repr__*  s   zJITFunction.__repr__F)NNNNNN)rM   rU   r   rO  r*  staticmethodr(  r+  r5  r9  r=  rQ  rU  rb  r   r#   r   re   r   r  r  r  r  r  r   r1   r1   r/   r2   rA     s4    

1Z
:
	rA   rK  JITFunction[T]c                 C     d S r4   r1   )rK  r1   r1   r2   jit3     r  r  r   rp  r   rc  r_   r   Optional[Callable]rp  r   Optional[Iterable[int]]rc  Optional[bool]r_   Callable[[T], JITFunction[T]]c                 C  r  r4   r1   r  r1   r1   r2   r  8  s   
Optional[T]4Union[JITFunction[T], Callable[[T], JITFunction[T]]]c                  s,   d fdd}| dur|| S |S )	a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    rK  r   r   r  c              	     sF   t | sJ tdddkrddlm} || S t|  dS )NTRITON_INTERPRETr  r   r   )InterpretedFunction)r  r   rc  r_   r   rp  )rR  r  getenvinterpreterr  rA   )rK  r  rc  r   rp  r_   r   r  r1   r2   	decoratora  s   zjit.<locals>.decoratorNrK  r   r   r  r1   )rK  r  r   rp  r   rc  r_   r  r1   r  r2   r  E  s   c                   @  s0   e Zd ZdZedd Zdd Zedd ZdS )	r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                 C  s"   | j jdkr| jdkrt| S | S )Nr   torch)r0   rM   rU   r  rs   r1   r1   r2   r    s   zMockTensor.wrap_dtypec                 C  s
   || _ d S r4   r   )r-   r   r1   r1   r2   r#        
zMockTensor.__init__c                   C  s   dS )Nr   r1   r1   r1   r1   r2   r     r  zMockTensor.data_ptrN)rM   rU   r   r   r  r  r#   r   r1   r1   r1   r2   r  }  s    
r  c                   @  sN   e Zd Zdd Zdd Zdd Zdd	d
Zdd Zdd Zdd Z	dd Z
dS )TensorWrapperc                 C  s*   || _ || _|j| _|j| _| jj| _d S r4   )r   basedatarC  shape)r-   r  r   r1   r1   r2   r#     s
   zTensorWrapper.__init__c                 C  r3   r4   )r  r   r6   r1   r1   r2   r     r  zTensorWrapper.data_ptrc                 C  s   | j |S r4   )r  stride)r-   r   r1   r1   r2   r    s   zTensorWrapper.strider   ri   c                 C  s   d| j  d| j dS )NzTensorWrapper[r?  r@  )r   r  r6   r1   r1   r2   __str__  s   zTensorWrapper.__str__c                 C  r3   r4   )r  element_sizer6   r1   r1   r2   r    r  zTensorWrapper.element_sizec                 C  s   t | j | jS r4   )r  r  cpur   r6   r1   r1   r2   r       zTensorWrapper.cpuc                 C  s   | j |j  d S r4   )r  copy_)r-   otherr1   r1   r2   r    r  zTensorWrapper.copy_c                 C  s   t | j|| jS r4   )r  r  tor   )r-   rC  r1   r1   r2   r    s   zTensorWrapper.toNr   ri   )rM   rU   r   r#   r   r  r  r  r  r  r  r1   r1   r1   r2   r    s    
r  c                 C  sP   t | tr|| jjkr| jS t| j|S t| drt| |S tdt|  d)Nr   zCannot reinterpret a r   )r!   r  r  r   r   r   r:   )tensorr   r1   r1   r2   reinterpret  s   


r  r  r  r  )r   r  rp  r  r   r  rc  r  r_   r  r   r  r4   )rK  r  r   r  rp  r  r   r  rc  r  r_   r  r   r  )4
__future__r   r   r<   r%   rV   r`   r  r  r  collectionsr   	functoolsr   typingr   r   r   r	   r
   r   r   r   r   r   runtime.driverr   typesr   rM   r   rQ   r   NodeVisitorr   r   r   r   r   r   r   r   r  r   r   rn  r   rA   r  r  r  r  r1   r1   r1   r2   <module>   s    0 
Z.
A	

  o8