o
    h                  	   @  s  d dl mZmZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lm Z  d	dl!m"Z" d	dl#m$Z$m%Z%m&Z&m'Z' e(de)d  Z*edZ+G dd dej,Z-dCddZ.G dd dZ/i Z0g Z1dd Z2dDddZ3G dd dee+ Z4dd Z5d d! Z6d"d# Z7eG d$d% d%Z8G d&d' d'e4e+ Z9edEd*d+Z:edddddddd,dFd7d+Z:	dGdddddddd,dHd:d+Z:G d;d< d<Z;G d=d> d>Z<d?d@ Z=dAdB Z>dS )I    )annotationsdivisionN)defaultdict)	dataclass)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTupleTensorDescriptor)
ModuleType   )knobs)driver)find_paths_ifget_iterable_pathtype_canonicalisation_dictcanonicalize_dtypez.runtime.jitTc                      s   e Zd ZdZd fddZedd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Z  ZS )DependenciesFindera  
    This AST visitor is used to find dependencies of a JITFunction. This can
    be used to invalidate a JITFunction's hash when its source code -- or
    that of its dependencies -- changes.

    This visitor also keeps track of the global variables touched by the
    JITFunction.  When we launch the kernel, we check that these have the same
    values as they did when we ran this visitor.  If not, we raise an error (or
    otherwise we could recompile).
    returnNonec                   sH   t    || _t|d| _|| _|| _h d| _	i | _
d| _d S )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstanceF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsused_global_valsvisiting_arg_default_value)selfr,   r1   r2   src	__class__ M/var/www/html/scripts/venv/lib/python3.10/site-packages/triton/runtime/jit.pyr+   )   s   


zDependenciesFinder.__init__c                 C  
   | j  S N)r0   	hexdigestr6   r:   r:   r;   retN      
zDependenciesFinder.retc                 C  s&   t |jrdS t|dd}|tS )NT
__module__ )inspect	isbuiltinfuncr(   
startswithTRITON_MODULE)r6   noderF   moduler:   r:   r;   _is_triton_builtinR   s   
z%DependenciesFinder._is_triton_builtinc                 C  s   t |tr]| j |j @ D ].}|\}}| j| \}}|j| \}}||kr=td| d| d| j d|j d| dq| j|j |j}|t	t
|dd7 }| j|d	 d S d S )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr   )r)   JITFunctionr4   keysRuntimeErrorr,   __name__update	cache_keystrr(   r0   r/   )r6   rF   kvar_name_v1v2func_keyr:   r:   r;   _update_hashX   s   
&zDependenciesFinder._update_hashc                   s   t |jtju r|jS |j jv rd S  fdd}||j\}}|d urM jsMt |turMt|t	sMt
|ddsM|j jvrMt||f j|jt|f<  | |S )Nc                   sD    j | d }|d ur| j fS  j| d }|d ur | jfS dS )N)NN)r1   getr2   )r,   valr?   r:   r;   name_lookupr   s   

z2DependenciesFinder.visit_Name.<locals>.name_lookup__triton_builtin__F)typectxastStoreidlocal_namesr5   r   r)   rN   r(   r3   copyr4   r[   )r6   rI   r^   r]   var_dictr:   r?   r;   
visit_Namej   s$   	

zDependenciesFinder.visit_Namec                   s    fdd|j D S )Nc                   s   g | ]}  |qS r:   )visit).0eltr?   r:   r;   
<listcomp>       z2DependenciesFinder.visit_Tuple.<locals>.<listcomp>)eltsr6   rI   r:   r?   r;   visit_Tuple   s   zDependenciesFinder.visit_Tuplec                 C  sf   |  |j}t|tjr|  |j}t|tjs|d u s$t|ddtkr&d S t||j}| | |S )NrQ   rC   )	ri   valuer)   rb   	Attributer(   rH   attrr[   )r6   rI   lhsr@   r:   r:   r;   visit_Attribute   s   
z"DependenciesFinder.visit_Attributec                 C  s"   dd |j j D | _| | d S )Nc                 S  s   h | ]}|j qS r:   arg)rj   rw   r:   r:   r;   	<setcomp>       z7DependenciesFinder.visit_FunctionDef.<locals>.<setcomp>)argsre   generic_visitro   r:   r:   r;   visit_FunctionDef   s   z$DependenciesFinder.visit_FunctionDefc                   sn    fdd}t |j|j|jr|jgng |jD ]} | q||j |jd ur0 |j ||j	 d S )Nc                   sB   z j rJ d _ | D ]}|d ur | qW d _ d S d _ w )NTF)r5   ri   )defaultsexprr?   r:   r;   visit_defaults   s   

z:DependenciesFinder.visit_arguments.<locals>.visit_defaults)
	itertoolschainposonlyargsrz   vararg
kwonlyargsri   kw_defaultskwargr}   )r6   rI   r   rw   r:   r?   r;   visit_arguments   s   (


z"DependenciesFinder.visit_argumentsc                 C  s:   |  |}t|tr|  jt|O  _d S | j| d S r=   )ri   r)   r$   re   setadd)r6   rI   targetr:   r:   r;   visitAssnTarget   s   

z"DependenciesFinder.visitAssnTargetc                 C  s4   t |jdkrtd| |jd  | | d S )N   z2Simultaneous multiple assignment is not supported.r   )r!   targets	TypeErrorr   r{   ro   r:   r:   r;   visit_Assign   s   zDependenciesFinder.visit_Assignc                 C     |  |j | | d S r=   r   r   r{   ro   r:   r:   r;   visit_AnnAssign      z"DependenciesFinder.visit_AnnAssignc                 C  r   r=   r   ro   r:   r:   r;   	visit_For   r   zDependenciesFinder.visit_For)r   r   )rQ   rB   __qualname____doc__r+   propertyr@   rK   r[   rh   rp   ru   r|   r   r   r   r   r   __classcell__r:   r:   r8   r;   r      s     %
'
 	r   r   rT   c                 C  s  dd l m  m} t| trZ|  } | dr/| d} t| } | ds'J d| dd   S | 	dr>dt| d d  S | drMdt| dd   S | drYt| dS n%t| |j
rhdt| j S t| |jrr| j} nt| tr{| j} nt| } t| d	d
| S )Nr   zconst const**kr   ztl._trC   )triton.language.corelanguagecorer)   rT   striprG   removeprefix_normalize_tyendswithpointer_type
element_tydtyper,   r`   rQ   r   r\   replace)tyr   r:   r:   r;   r      s.   






r   c                   @  sr   e Zd ZdZdd	d
Zedd ZedddZedddZedd Z	edd Z
edd Zedd ZdS )KernelParamzBRepresents a parameter (name plus metadata) to a @jit'ed function.numr    paraminspect.Parameterdo_not_specializebooldo_not_specialize_on_alignmentc                 C  s   || _ || _|| _|| _d S r=   )r   _paramr   r   )r6   r   r   r   r   r:   r:   r;   r+   
  s   
zKernelParam.__init__c                 C     | j jS r=   )r   r,   r?   r:   r:   r;   r,        zKernelParam.namer   rT   c                 C  s(   | j jr| j jtjjkrdS t| j jS )NrC   )r   
annotationrD   	Parameteremptyr   r?   r:   r:   r;   r     s   zKernelParam.annotationc                 C  sN   | j }|dr|dd  }n|dr|dd  }|tt v r%| j S dS )Nr   r   r   r   rC   )r   rG   r   r   values)r6   ar:   r:   r;   annotation_type  s   

zKernelParam.annotation_typec                 C  s
   d| j v S N	constexpr)r   r?   r:   r:   r;   is_constexpr&  rA   zKernelParam.is_constexprc                 C  s    | j rdS d| jv p| jdS )NFr   r   )r   r   rG   r?   r:   r:   r;   is_const*  s   zKernelParam.is_constc                 C  r   r=   )r   defaultr?   r:   r:   r;   r   0  r   zKernelParam.defaultc                 C  s   | j jtjjkS r=   )r   r   rD   r   r   r?   r:   r:   r;   has_default4  s   zKernelParam.has_defaultN)r   r    r   r   r   r   r   r   r   rT   )rQ   rB   r   r   r+   r   r,   r   r   r   r   r   r   r   r:   r:   r:   r;   r     s"    





r   c                   s0   ddl m ddlm  d	 fdd	S )
Nr   r   r   r   FTc                   s   d u rdS t  trdS t  trA|r d|dnd } dkr%|r%dS d kr1 dkr1d	|fS d
 kr= dkr=d|fS d|fS t  trHdS t dr} j|f}t|d }|d u rn|d rbdndt|d  }|t|< |rw d|dnd }||fS t  t	rd j
fS t  rd fS t drdS t  trfdd D } fdd}|dd |D }	|dd |D }
|	|
fS t  trt jdsJ t jj}d| t j dd fS t  rt jdsJ t jj}d| t j d jdd fS td t  )!N)r   N)u1Nr    )alignr   )r   r   i   ii32l            l    u64i64)fp32Ndata_ptrr   r   r   tensorr   tma_desc_cpu_ptr)	nvTmaDescNc                   s   g | ]} |qS r:   r:   rj   x)specialize_implr:   r;   rl   c      zCcreate_specialize_impl.<locals>.specialize_impl.<locals>.<listcomp>c                   s   t  drt |  S t| S )N_fields)hasattrr`   tuple)valsrv   r:   r;   <lambda>d      zAcreate_specialize_impl.<locals>.specialize_impl.<locals>.<lambda>c                 S     g | ]}|d  qS r   r:   r   r:   r:   r;   rl   e  r   c                 S  r   r   r:   r   r:   r:   r;   rl   f  r   ztensordesc<>,zUnsupported type: %s)r)   r   r    r%   r   r   	dtype2strr\   r   rN   rS   r   r   baser$   block_shapelayoutr   r`   )rw   r   specialize_valuer   keydskresspec
make_tupletysrO   innerGluonTensorDescriptorr   specialize_extrar   rv   r;   r   B  sX   










"z/create_specialize_impl.<locals>.specialize_impl)FTT)r   r   'triton.experimental.gluon.nvidia.hopperr   )r   r:   r   r;   create_specialize_impl=  s   1r   Fc                 C  s6   t tdkrttdd  td }|| |dd S )Nr   c                 [     d S r=   r:   )rW   kwargsr:   r:   r;   r   x  s    zmangle_type.<locals>.<lambda>)r   )r!   specialize_impl_cacheappendr   )rw   
specializer   r:   r:   r;   mangle_typev  s   r   c                   @  s    e Zd ZU ded< dddZdS )KernelInterfacer   runr   c                   s    fddS )z
        A JIT function is launched with: fn[grid](*args, **kwargs).
        Hence JITFunction.__getitem__ returns a callable proxy that
        memorizes the grid.
        c                    s   j |  dd|S )NFgridwarmup)r   )rz   r   r   r6   r:   r;   r     rm   z-KernelInterface.__getitem__.<locals>.<lambda>r:   )r6   r   r:   r   r;   __getitem__  s   zKernelInterface.__getitem__N)r   r   )rQ   rB   r   __annotations__r   r:   r:   r:   r;   r   }  s   
 r   c           	   	   C  sl   dd |  D }dd l}| |dd | D t| dd | D t| |j|d}||}|S )Nc                 S  s*   i | ]\}}||j jd krt|n|qS r   )r9   rQ   rT   rj   r   rq   r:   r:   r;   
<dictcomp>  s   * z1serialize_specialization_data.<locals>.<dictcomp>r   c                 S     g | ]}t |qS r:   r$   r   r:   r:   r;   rl     r   z1serialize_specialization_data.<locals>.<listcomp>c                 S  r   r:   r   r   r:   r:   r;   rl     r   )r,   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr   )itemsjsonrO   r$   r   __dict__dumps)	r,   r   	constantsattrsr  r   r  objserialized_objr:   r:   r;   serialize_specialization_data  s   $
r  c              
   C  s  t | jt |ksJ g }t| j |D ]o\}}|jr&|d| d q|jr+dnd}|jr2dnd}|jr9dnd}d| d| d| d| d	}	|j	r~t
|j	trc|j	dksa|j	dd	 d
v rcd}|rs|d|j	 d|	 d q|d|j	 d q||	  qdd }
ddtt|
| j dg  dddd | j D  dd| d}dd | j D }t|d< t|j|d< t|| |d S )a2  
    Equivalent to sig.bind followed by apply_defaults. This generates a
    native Python function (using exec) which can be memoized on a per-kernel
    basis to avoid having to run these expensive functions -- which constitute
    much of the kernel launch overhead -- every time we run the kernel.
    z("constexpr", )TrueFalsezspecialize_impl(, r   Nr   )fpbfFz("z",) + z[1:]z", None)c                 S  s0   | d j tjju r| d S | d  d| d  S )Nr   r   z	=default_r   rD   r   r   )r   r:   r:   r;   r     s   0 z0create_function_from_signature.<locals>.<lambda>z
def dynamic_func(z	**optionsz):
    params = {c                 S  s   g | ]
}d | d| qS )'z': r:   )rj   r,   r:   r:   r;   rl     s    z2create_function_from_signature.<locals>.<listcomp>z}
    specialization = [r   z-]
    return params, specialization, options
c                 S  s,   i | ]\}}|j tjjurd | |j qS )default_r  )rj   r,   r   r:   r:   r;   r     s
    z2create_function_from_signature.<locals>.<dictcomp>rN   r   dynamic_func)r!   
parametersziprO   r   r   r   r   r   r   r)   rT   joinr$   mapr  rN   r   get_arg_specializationexec)sigkparamsbackendspecializationr,   kpr   r   r   r@   rw   	func_bodyfunc_namespacer:   r:   r;   create_function_from_signature  s@   
r%  c                 C  s   | j  d| j S )N.)rB   r   fnr:   r:   r;   get_full_name     r)  c                   @  s&   e Zd ZU ded< ded< ded< dS )JitFunctionInfor   rJ   rT   r,   rN   jit_functionN)rQ   rB   r   r   r:   r:   r:   r;   r+    s   
 r+  c                      s   e Zd Zdd Zd&ddZdd Zd	d
 Zdd Zdd Z		d'ddZ	dd Z
edd Zedd Zdd Zdd Zdd Zdd Z fd d!Z fd"d#Zd$d% Z  ZS )(rN   c                 C     dS )NFr:   r?   r:   r:   r;   is_gluon  s   zJITFunction.is_gluonr   bool | Nonec	                 C  s   |sd S | j j}	| j j}
ddd t| j|d D }|	 d|j d|j d|j d|j	 d	|j
 d
| d}t| j }t||||d ||}||||j|j|j|j	|j
|j|||d}|||t|
|	| d|i||ddS )Nr  c                 S  s    g | ]\}}|j  d | qS )z: r,   )rj   r   r   r:   r:   r;   rl          z*JITFunction._call_hook.<locals>.<listcomp>r   z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](r  r   )r   devicer	  	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr   F)r   reprr(  compileis_manual_warmupalready_compiled)r(  r   rB   r  r  paramsr4  r5  r6  r7  r8  r)  r  r9  r+  )r6   hookr   r   r3  r	  r  r:  r<  r,   rJ   	arg_reprsr=  	full_namer;  r   r:   r:   r;   
_call_hook  s:    8


zJITFunction._call_hookc                 C  s   t |sJ | j| dS )z
        Add a hook that will be executed prior to the execution of run
        function with args and kwargs passed into the kernel
        N)callablepre_run_hooksr   )r6   rB  r:   r:   r;   add_pre_run_hook  s   zJITFunction.add_pre_run_hookc                 C  sX   ddl m}m}m}m} tj }||}|| _|| _|| _t| j	| j
|}i |||fS )z1
        Precompute as much as possible.
        r   )CompiledKernelr>  	ASTSourcemake_backend)compilerrI  r>  rJ  rK  r   activeget_current_targetr%  r   rA  )r6   rI  r>  rJ  rK  r   r   binderr:   r:   r;   create_binder  s   
zJITFunction.create_binderc          !   
     s  | d| jp
tjj|d< tj }tj|}| jD ]	}||i | q| j	| \}}	}
|
|i |\}}t
|t
| }| |d }|d u r|}dd | jD }dd |D }dd t||D }d|vspJ dd	|vsxJ d
d|vsJ d|D ]}||jvr||vrtd| qt|dd }fdd|D }dd |D  t dd } fdd|D }| tjj||||||g|rd S | | |||}| j||	|jd}|||< | tjj||||||g| t }| j D ] \\}}\}}| || }|krtd| d| d| q|sp|d us$J t|r-|}t|}|d }|dkr>|d nd}|dkrI|d nd}|j||g R  } |j|||||j|j | tjj!tjj"g	 R   |S )Ndebugc                 S     g | ]}|j qS r:   r0  r   r:   r:   r;   rl   <  ry   z#JITFunction.run.<locals>.<listcomp>c                 S  r   r   r:   r   r:   r:   r;   rl   =  r   c                 S  s   i | ]\}}||qS r:   r:   )rj   rU   vr:   r:   r;   r   >  rm   z#JITFunction.run.<locals>.<dictcomp>device_typez=device_type option is deprecated; current target will be usedr3  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                 S  s   |dkS r   r:   )rW   r]   r:   r:   r;   r   G  s    z!JITFunction.run.<locals>.<lambda>c                   s    i | ]}|t t  |qS r:   )r   r$   r   )rj   path)
bound_argsr:   r;   r   H  r1  c                 S  r   r   r:   r   r:   r:   r;   rl   J  r   c                 S  s
   t |tS r=   )r)   rT   )rW   r   r:   r:   r;   r   K  s   
 c                   s   i | ]}| t |qS r:   )
parse_attrr   )rj   rU   )attrvalsr   r:   r;   r   L  r   )r   r  rL   z1 has changed since we compiled this kernel, from z to r   r   r   )#r\   rQ  r   runtimer   rM  get_current_deviceget_current_streamrG  device_cachesrT   parse_optionsrA  r  r  KeyErrorr   rE  jit_cache_hookrJ  r>  jit_post_compile_hookobjectr4   r  rP   rF  r!   launch_metadatar   r   functionpacked_metadatalaunch_enter_hooklaunch_exit_hook)!r6   r   r   rz   r   r3  rU  rB  kernel_cacher   rO  r!  r  r   kernelsigkeyssigvalsr   rU   
constexprsr
  r7   not_presentr,   rW   r]   globals_dictnewVal	grid_sizegrid_0grid_1grid_2rc  r:   )rY  r   rW  r;   r   #  st   



zJITFunction.runc                 C  s   | j d u r| jS |  |S r=   )_repr_fn_name)r6   rW   r:   r:   r;   r=  m  s   zJITFunction.reprNc	                 C  sz  |r|ng }|r
|ng }|| _ |j| _|| _t|| _|| _|| _t|d | _	|| _
t|| _|| _g | _t| jj D ]!\}	}
|	|v pL|
j|v }|	|v pU|
j|v }| jt|	|
|| q@tt|}|td|tj d  }| | t| j| _d | _ i | _!d | _"|| _#|| _$dd | jD | _%dd | jD | _&g | _'|j(| _(|j)| _)|j*| _*|j+| _+|j| _d S )Nr   z^def\s+\w+\s*\(c                 S  rR  r:   r0  rj   pr:   r:   r;   rl     ry   z(JITFunction.__init__.<locals>.<listcomp>c                 S  s   g | ]}|j r|jqS r:   )r   r   rv  r:   r:   r;   rl     s    ),r(  rB   rJ   versionrD   r   r   r   getsourcelinesstarting_line_numberrt  r)  ru  rc  rA  	enumerater  r   r,   r   r   textwrapdedent	getsourceresearch	MULTILINEstart_unsafe_update_srcr   rP  r]  hashr4   ri  rQ  rM   	arg_namesrl  rG  r   rQ   r   __globals__)r6   r(  rx  r   r   rQ  rM   r=  rc  ir   dnsdns_oar7   r:   r:   r;   r+   p  sD   

zJITFunction.__init__c                 C  s   | j t| jjB S r=   )r  rD   getclosurevarsr(  r2   r?   r:   r:   r;   get_capture_scope     zJITFunction.get_capture_scopec                 C  sh   | j d u r1t| jj}t| j| j|| jd}|	| 
  |jt| j | _ tt|j | _| j S )N)r,   r1   r2   r7   )r  rD   r  r(  r2   r   ru  r  r7   ri   parser@   rT   rz  dictsortedr4   r  )r6   r2   dependencies_finderr:   r:   r;   rS     s   
zJITFunction.cache_keyc                 C  s   ddl m} |S )Nr   r   )r   r   )r6   r   r:   r:   r;   r`     s   zJITFunction.typec                O  s   | j ttj||dd|S )NTr   )r   r  
MockTensor
wrap_dtype)r6   r   rz   r   r:   r:   r;   r     s   zJITFunction.warmupc                   s  ddl m}m} dd l}dd lm  tj }|	|}|d | j
kr0td|d  d| j
 tt|d }|d } fd	d
t||D }	tt|d }
|d }tt|
|}t|d  }|| ||	|}dd
 |d  D }|d }||d |}|| j| d |< |S )Nr   )r>  rJ  r   r,   zSpecialization data is for z but trying to preload for r   r  c                   s,   i | ]\}}| j |r  |n|qS r:   )r   is_dtyper   tlr:   r;   r     s    z'JITFunction.preload.<locals>.<dictcomp>r  r  r   c                 S  s(   i | ]\}}|t |trt|n|qS r:   )r)   r$   r   r   r:   r:   r;   r     s    r  r   )rL  r>  rJ  r  triton.languager   r   rM  r[  loadsru  rP   r  r   r  r  r  r]  )r6   r;  r>  rJ  r  r3  deserialized_objr   r  r	  r  r  r
  r   r7   r  r   ri  r:   r  r;   preload  s4   



zJITFunction.preloadc                 C  sH   t | j}t|t jsJ t|jdksJ t|jd t js"J |S )Nr   r   )rb   r  r7   r)   Moduler!   bodyFunctionDef)r6   treer:   r:   r;   r    s
   zJITFunction.parsec                 O  s   t d)Nz:Cannot call @triton.jit'd outside of the scope of a kernel)rP   )r6   rz   r   r:   r:   r;   __call__  s   zJITFunction.__call__c                   s.   |dkrt d| dtt| || d S )Nr7   zCannot set attribute 'zX' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorr*   rN   __setattr__)r6   r,   rq   r8   r:   r;   r    s   zJITFunction.__setattr__c                   s   d| _ t d| dS )z
        The only method allowed to modify src.
        Bypasses the __setattr__ restriction by calling super().__setattr__ directly.
        Nr7   )r  r*   r  )r6   new_srcr8   r:   r;   r    s   zJITFunction._unsafe_update_srcc                 C  s   d| j  d| jj dS )NzJITFunction(:r  )rJ   r(  r   r?   r:   r:   r;   __repr__  s   zJITFunction.__repr__)r   r/  )NNNNNNN)rQ   rB   r   r.  rE  rH  rP  r   r=  r+   r  r   rS   r`   r   r  r  r  r  r  r  r   r:   r:   r8   r;   rN     s,    
.J
>

 rN   r(  JITFunction[T]c                 C  r   r=   r:   r'  r:   r:   r;   jit     r  rx  r=  rc  r   r   rQ  rM   r=  Optional[Callable]rc  r   Optional[Iterable[int | str]]r   rQ  Optional[bool]rM   Callable[[T], JITFunction[T]]c                 C  r   r=   r:   r  r:   r:   r;   r    s   Optional[T]4Union[JITFunction[T], Callable[[T], JITFunction[T]]]c          	        s.   d fdd}| dur|| S |S )	a<  
    Decorator for JIT-compiling a function using the Triton compiler.

    :note: When a jit'd function is called, arguments are
        implicitly converted to pointers if they have a :code:`.data_ptr()` method
        and a `.dtype` attribute.

    :note: This function will be compiled and run on the GPU. It will only have access to:

           * python primitives,
           * builtins within the triton package,
           * arguments to this function,
           * other jit'd functions

    :param fn: the function to be jit-compiled
    :type fn: Callable
    r(  r   r   r  c              
     sP   t | sJ tjjrddlm} ||  dS t|  dS )Nr   )InterpretedFunction)rx  r   r   rQ  rM   r=  rc  )rF  r   rZ  	interpretinterpreterr  rN   )r(  r  rQ  r   r   rc  rM   r=  rx  r:   r;   	decorator8  s"   zjit.<locals>.decoratorNr(  r   r   r  r:   )	r(  rx  r=  rc  r   r   rQ  rM   r  r:   r  r;   r    s   c                   @  s<   e Zd ZdZedd Zdd Zedd Zedd	 Zd
S )r  zr
    Can be used in place of real tensors when calling:
        kernel.warmup(MockTensor(torch.float32), ...)
    c                 C  s"   | j jdkr| jdkrt| S | S )Nr   torch)r9   rQ   rB   r  rv   r:   r:   r;   r  ]  s   zMockTensor.wrap_dtypec                 C  s
   || _ d S r=   r   )r6   r   r:   r:   r;   r+   c     
zMockTensor.__init__c                   C  r-  Nr   r:   r:   r:   r:   r;   r   f  r  zMockTensor.data_ptrc                   C  r-  r  r:   r:   r:   r:   r;   	ptr_rangej  r  zMockTensor.ptr_rangeN)	rQ   rB   r   r   staticmethodr  r+   r   r  r:   r:   r:   r;   r  W  s    

r  c                   @  s^   e Zd Zdd Zdd Zdd Zdd	d
Zdd Zdd Zdd Z	dd Z
dd Zdd ZdS )TensorWrapperc                 C  s*   || _ || _|j| _|j| _| jj| _d S r=   )r   r   datar3  shape)r6   r   r   r:   r:   r;   r+   q  s
   zTensorWrapper.__init__c                 C  r<   r=   )r   r   r?   r:   r:   r;   r   x  r  zTensorWrapper.data_ptrc                 G  s   | j j| S r=   )r   stride)r6   rz   r:   r:   r;   r  {  s   zTensorWrapper.strider   rT   c                 C  s   d| j  d| j dS )NzTensorWrapper[r2  r  )r   r   r?   r:   r:   r;   __str__~  s   zTensorWrapper.__str__c                 C  r<   r=   )r   element_sizer?   r:   r:   r;   r    r  zTensorWrapper.element_sizec                 C     t | j | jS r=   )r  r   cpur   r?   r:   r:   r;   r    r*  zTensorWrapper.cpuc                 C  s   | j |j  d S r=   )r   copy_)r6   otherr:   r:   r;   r    r*  zTensorWrapper.copy_c                 C  r  r=   )r  r   cloner   r?   r:   r:   r;   r    r*  zTensorWrapper.clonec                 C     t | j|| jS r=   )r  r   tor   )r6   r3  r:   r:   r;   r    r  zTensorWrapper.toc                 C  r  r=   )r  r   	new_emptyr   )r6   sizesr:   r:   r;   r    r  zTensorWrapper.new_emptyNr   )rQ   rB   r   r+   r   r  r  r  r  r  r  r  r  r:   r:   r:   r;   r  o  s    
r  c                 C  sP   t | tr|| jjkr| jS t| j|S t| drt| |S tdt|  d)Nr   zCannot reinterpret a r&  )r)   r  r   r   r   r   r`   )r   r   r:   r:   r;   reinterpret  s   


r  c                 C  sr   | }t |ts|j}t |tr|jjj}t|j\}}t|D ]\}}| 	dr4||7 } ||fS q ||fS )Nzdef )
r)   rN   r(  __code__co_filenamerD   ry  r{  r   rG   )r(  base_fn	file_namelines
begin_lineidxliner:   r:   r;   get_jit_fn_file_line  s   


r  r   )Fr  )r=  r  rc  r  r   r  r   r  rQ  r  rM   r  r   r  r=   )r(  r  r=  r  rc  r  r   r  r   r  rQ  r  rM   r  r   r  )?
__future__r   r   rb   rf   r-   rD   r   r  r|  collectionsr   dataclassesr   	functoolsr   typingr   r   r	   r
   r   r   r   r   r   r   triton.tools.tensor_descriptorr   typesr   rC   r   runtime.driverr   _utilsr   r   r   r   rQ   r!   rH   r   NodeVisitorr   r   r   r   r   r   r   r   r  r%  r)  r+  rN   r  r  r  r  r  r:   r:   r:   r;   <module>   sz    0 
Q2
9:  /<%