o
    hT                     @  s  d dl mZ d dlZd dlZddlmZmZ ddlmZ ddlm	Z	 ddlm
Z
mZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ d dlmZ d dlZd dlZd dlZd dlZd dlZdZ de iZ!dZ"de"iZ#dd Z$G dd dZ%G dd dZ&e' dd Z(e' dd Z)dd Z*d1dd Z+G d!d" d"Z,d2d#d$Z-d3d)d*Z.G d+d, d,Z/G d-d. d.e0Z1G d/d0 d0Z2dS )4    )annotationsN   )get_cache_invalidating_env_varsir)backends)Language)BaseBackend	GPUTarget)__version__knobs)OutOfResources)get_cache_managerget_dump_managerget_override_manager)driver)get_sass)Pathz=\.(?:visible|extern)\s+\.(?:entry|func)\s+(\w+)\s*\(([^)]*)\)ptxz\.param\s+\.(\w+)c                 C  sP   t d| }t d| }|d urdS t dd| } |d ur&dt|d S | S )Nz!tt\.ptr<([^,]+)ztt.nv_tma_desc = 1	nvTmaDescz {[^}]+} *   )researchsubconvert_type_reprgroup)xmatchtma r    S/var/www/html/scripts/venv/lib/python3.10/site-packages/triton/compiler/compiler.pyr   '   s   r   c                   @  s0   e Zd ZddddZdd Zdd	 Zd
d ZdS )	ASTSourceNreturnNonec                 C  s   || _ tj| _d| _|j| _|| _t | _	|d ur<|
 D ]\}}t|tr-|j|fn|}t|ts6J || j	|< q|p@t | _t| jtrXdd t| jdD | _d S | j D ]}t|tshtdq]d S )Nttirc                 S  s   i | ]	\}}||  qS r    )strip.0kvr    r    r!   
<dictcomp>D       z&ASTSource.__init__.<locals>.<dictcomp>,zSignature keys must be string)fnr   TRITONlanguageext__name__name	signaturedict	constantsitems
isinstancestr	arg_namesindextupleattrs	enumeratesplitkeys	TypeError)selfr.   r4   
constexprsr=   r)   r*   r    r    r!   __init__6   s&    
zASTSource.__init__c                   sz   dd t | j D }dd  d fddt | j D }| jj dt| j d| d| }t	
|d S )Nc                 S  s   g | ]\}}|qS r    r    r'   r    r    r!   
<listcomp>K   s    z"ASTSource.hash.<locals>.<listcomp>c                 S  s   t | dr| jS t| S )N	cache_key)hasattrrF   r9   )r   r    r    r!   <lambda>L       z ASTSource.hash.<locals>.<lambda>-c                   s   g | ]\}} |qS r    r    r'   get_keyr    r!   rE   M   rI   utf-8)sortedr4   r7   joinr6   r.   rF   r9   r=   hashlibsha256encode	hexdigest)rB   
sorted_sigconstants_keykeyr    rK   r!   hashJ   s
   "$zASTSource.hashc                 C  s"   ddl m} || j| ||||dS )Nr   )ast_to_ttir)contextoptionscodegen_fns
module_map)code_generatorrX   r.   )rB   rZ   r[   r\   rY   rX   r    r    r!   make_irQ   s   zASTSource.make_irc                 C  s   t  S N)r5   rB   r    r    r!   parse_optionsV   s   zASTSource.parse_optionsNNr#   r$   r2   
__module____qualname__rD   rW   r^   ra   r    r    r    r!   r"   4   s
    r"   c                   @  s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
IRSourcec           
      C  s   || _ t|}|jdd  | _tj| _| | _t	
| |
| | jdkrStt| j | jtj}|d| _|d}tt| j |}dd t|D | _d S t	| j || _| j }d| | _| j|}| j|}	dd t|	D | _d S )Nr   r   r   c                 S  s   i | ]	\}}|t |qS r    )r   r(   r)   tyr    r    r!   r+   l   r,   z%IRSource.__init__.<locals>.<dictcomp>@c                 S  s   i | ]\}}||qS r    r    rh   r    r    r!   r+   s   s    )pathr   suffixr1   r   r/   r0   	read_textsrcr   load_dialectsr   r   prototype_pattern	MULTILINEr   r3   findallarg_type_patternr>   r4   parse_mlir_modulemoduleget_entry_func_nameget_functionget_function_signature)
rB   rk   rY   backendr   r4   typesfn_namefuncOpfunc_tyr    r    r!   rD   \   s&   






zIRSource.__init__c                 C  s   t | jd S )NrM   )rP   rQ   rn   rR   rS   r`   r    r    r!   rW   u   s   zIRSource.hashc                 C  s   || j _| j S r_   )ru   rY   )rB   rZ   r[   r\   rY   r    r    r!   r^   x   s   zIRSource.make_irc                 C  s4   | j dkr| jd}|d usJ dd|iS t S )Nttgirzttg.num-warpsz'Unable to parse ttg.num-warps attribute	num_warps)r1   ru   get_int_attrr5   )rB   r   r    r    r!   ra   |   s
   
zIRSource.parse_optionsNrd   r    r    r    r!   rg   Z   s
    rg   c               
   C  s  dd l } tjtjtjt}g }ttd}|t|	 
 g7 }W d    n1 s0w   Y  tj|ddftj|ddfg}|D ]6\}}| j|g|dD ])}t|j|jjd}|t|	 
 g7 }W d    n1 syw   Y  qUqIt }tdd	d
 }	ttj|dd|	 d}	 |	d}
|
sn||
 qW d    n1 sw   Y  ||
  tj|d}| j|gddD ])}t|j|jjd}|t|	 
 g7 }W d    n1 sw   Y  qt d| S )Nr   rbcompilerztriton.compiler.r   ztriton.backends.)prefix
EXT_SUFFIX._Cz
libtriton.Ti   r0   ztriton.language.rJ   )pkgutilosrk   dirnameabspath__file__openrP   rQ   readrS   rO   walk_packagesmodule_finder	find_specr3   origin	sysconfigget_config_varr?   updateappendr
   )r   TRITON_PATHcontentsfpath_prefixesrk   r   liblibtriton_hashr1   chunklanguage_pathr    r    r!   
triton_key   sF   

r   c                 C  s   t jj| d S )Nmax_shared_mem)r   activeutilsget_device_properties)devicer    r    r!   r      s   r   c                 C  sj   |dks|dkrt | |}||_|S |dks|dks|dkr%t|  S |dks-|dkr3t|  S d S )Nr%   r~   llirr   amdgcncubinhsaco)r   rt   rY   r   rm   
read_bytes)	full_namer1   rY   ru   r    r    r!   parse   s   r   eBaseExceptionc                   s   t jjrdS | jdurt| j | jdurt| j ddg}dd |D }| j g } durEt fdd|D s>|   j	  dus.t
||dd D ]\}}||_	qN|s]d| _dS d|d	 _	|d
 | _dS )z
    Removes code_generator.py and related files from tracebacks.

    These are uninteresting to the user -- "just show me *my* code!"
    Nz"/triton/compiler/code_generator.pyz/ast.pyc                 S  s   g | ]	}| d tjqS )/)replacer   sep)r(   bad_filer    r    r!   rE      r,   z$filter_traceback.<locals>.<listcomp>c                 3  s$    | ]} j jj|r|V  qd S r_   )tb_framef_codeco_filenameendswith)r(   r   tbr    r!   	<genexpr>   s   " z#filter_traceback.<locals>.<genexpr>r   r   r   )r   compilationfront_end_debugging	__cause__filter_traceback__context____traceback__anyr   tb_nextzip)r   	BAD_FILESframes	cur_frame
next_framer    r   r!   r      s.   






r   c                   @  s4   e Zd ZdddZdddZdd	d
ZdddZdS )CompileTimerr#   r$   c                 C  s    t   | _d | _g | _d | _d S r_   )timestartir_initialization_endlowering_stage_endsstore_results_endr`   r    r    r!   rD      s   

zCompileTimer.__init__c                 C  s   t   | _d S r_   )r   r   r`   r    r    r!   finished_ir_initialization   s   z'CompileTimer.finished_ir_initialization
stage_namer9   c                 C  s   | j |t f d S r_   )r   r   r   )rB   r   r    r    r!   stage_finished   s   zCompileTimer.stage_finishedknobs.CompileTimesc                 C  s~   t   }| jd u r|| _n|| _d
dd}g }| j}| jD ]\}}|||||f |}qtj|| j| j|||| jd	S )Nr   floatendfloat | Noner#   intc                 S  s   |d u rdS t ||  d S )Nr   i@B )r   )r   r   r    r    r!   delta   s   zCompileTimer.end.<locals>.delta)ir_initializationlowering_stagesstore_results)r   r   r   r   r#   r   )r   r   r   r   r   r   CompileTimesr   )rB   	timestampr   lowering_stage_durationsstage_startr   	stage_endr    r    r!   r      s   


zCompileTimer.endNrc   )r   r9   r#   r$   )r#   r   )r2   re   rf   rD   r   r   r   r    r    r    r!   r      s
    


r   c           '      C  sf  t jj}|r	t }|d u rtj }t|tsJ dt	|}t| t
 }|r:t| ts0J dt }t| ||} |  }|t|pEt fi |}t }	t  d|   d|  d|  dtt|	  	}
t|
d }t|}t jj}t jj}t jj}|rt|  nd }|rt|  nd }| j d d }| d}|!|pi }|"|}t jj#}|s|d urt$| ||}|r|| |j%& ||' dd |S ||d	|j(|	}t)|d
< t }|*||| j+ t,|- .| j/}|r|d7 }t| tst }t0| |0| |1|}|2 }z
| 3||||}W n t4y6 } zt5|  d }~ww |rK| d| j/ }|6||||< n| d}|6||||< t jj7} |rp| rp|8| j9 t:d| j9  |rw|;  t,| |d  D ]\}!}"|"||}#| d|! }|d u r|"dd  }$r|$<d|! rt=|$|!|}#n|>| }%rt:d|%  t=|%|!|}#|r|!dv r|6|#|||< |d ur|6|#| | |!kr|>|}&|#8|& t:d|&  |#}|r|?|! q|j6t@jA|tBd|dd||< |C|| t jjDs|E  |r-|| |||' dd t$| ||S )Nz target must be of GPUTarget typez'source must be either AST or a filepathrJ   rM      .jsonT)rn   metadatametadata_grouptimes	cache_hit)rW   targettriton_versionr   r   z.sourcezCreating new locations for ir_overridez
Overriding kernel with file )r   r   json)defaultF)binary)Fr   r   listenerr   r   r   get_current_targetr8   r	   make_backendr"   r9   r   rY   rg   ra   r5   r   r   rW   rN   r7   rP   rQ   rR   rS   r   overridedump_irstore_binary_onlyr   r   r3   	get_groupgetalways_compileCompiledKernelr   _asdictr   __dict__r
   
add_stagesr0   listr@   r;   r1   ro   get_codegen_implementationget_module_mapr^   	Exceptionr   put
use_ir_loccreate_location_snapshotrk   printr   r   r   get_filer   r   dumpsvars	put_groupenable_asandisable_multithreading)'rn   r   rZ   compilation_listenertimerry   	ir_sourcerY   extra_optionsenv_varsrV   rW   fn_cache_managerenable_overrideenable_ir_dumpstore_only_binaryfn_override_managerfn_dump_manager	file_namemetadata_filenamer   metadata_pathr   resr   stagesfirst_stager[   r\   ru   r   ir_filenamer   r1   
compile_irnext_moduler   r   ir_full_namer    r    r!   compile  s   
:







$







r  r   r	   r#   r   c                   sN    fddt  D }t|dkr!tt| d j d| d|d  S )Nc                   s   g | ]}|j  r|j qS r    )r   supports_target)r(   r   r   r    r!   rE     s    z make_backend.<locals>.<listcomp>r   z! compatible backends for target (z) (z). There should only be one.r   )r   valueslenRuntimeErrorry   )r   activesr    r  r!   r     s   r   c                   @  s$   e Zd Zdd Zdd Zdd ZdS )LazyDictc                 C  s   || _ g | _d S r_   )dataextras)rB   r!  r    r    r!   rD     s   
zLazyDict.__init__c                 C  s0   | j D ]\}}| j|| B | _q| j   | jS r_   )r"  r!  clearrB   funcargsr    r    r!   r     s   
zLazyDict.getc                 C  s   | j ||f d S r_   )r"  r   r$  r    r    r!   add  s   zLazyDict.addN)r2   re   rf   rD   r   r'  r    r    r    r!   r     s    r   c                   @  s   e Zd Zdd ZdS )AsmDictc                 C  s.   |dkrt | d }ntd| || |< |S )Nsassr   zUnknown key: '%s')r   KeyError)rB   rV   valuer    r    r!   __missing__  s
   zAsmDict.__missing__N)r2   re   rf   r,  r    r    r    r!   r(    s    r(  c                      s<   e Zd Zdd Zdd Z fddZdd Zd	d
 Z  ZS )r   c                   s  ddl m} tdd | D }t| }t|d |d< |d }t|d |d |d	 |d< |d
t	t
| }|di || _t| jj}	|	| j| _|| _|| _| jj| _dd | D }
|	j t fdd|
D | _| j  | _d | _d | _d S )Nr   )
namedtuplec                 s  s&    | ]\}}| d rt|V  qdS )r   Nr   r   r(   cpr    r    r!   r     s   $ z*CompiledKernel.__init__.<locals>.<genexpr>cluster_dimsr   ry   arch	warp_sizeKernelMetadatac                 S  s"   g | ]\}}| d st|qS )r   r.  r/  r    r    r!   rE     s   " z+CompiledKernel.__init__.<locals>.<listcomp>c                   s:   i | ]}|j d d |j d d  kr| n| qS )r   N)rl   r   rm   )r(   file
binary_extr    r!   r+     s    ,z+CompiledKernel.__init__.<locals>.<dictcomp>r    )collectionsr-  nextr7   r   loadsrm   r<   r	   rN   r   r@   r   r   r   pack_metadatapacked_metadatarn   rW   r3   r8  r(  asmkernelru   function)rB   rn   r   rW   r-  r  r   r   r5  ry   	asm_filesr    r7  r!   rD     s*   


zCompiledKernel.__init__c                 C  s   | j d urd S tj }tj| j| j| _t|}| jj	|kr(t
| jj	|dt| jdrD| jjd urDd}| jj|krDt
| jj|dtjj| j| j| jj	|\| _ | _| _| _| _tj j}| jj| | jkrvt
| jj| | jdd S )Nzshared memory	tmem_sizei   ztensor memorythreads)ru   r   r   get_current_devicelauncher_clsrn   r   runr   sharedr   rG   rB  r   load_binaryr3   r?  r@  n_regsn_spillsn_max_threadsr   r4  r   )rB   r   
max_sharedmax_tmem_sizer4  r    r    r!   _init_handles  s$   

zCompiledKernel._init_handlesc                   s   |dkr|    t |S )NrF  )rN  super__getattribute__)rB   r3   	__class__r    r!   rP    s   zCompiledKernel.__getattribute__c           	      G  s   t jjd u rd S t| j| j|d}t| jtr| jj	j
d u r!|S i }d}t| jj	jD ]\}}|| ||< |d7 }q,|| jj	j
|| j|f |S )N)r3   r@  streamr   r   )r   runtimelaunch_enter_hookr   r3   r@  r8   rn   r"   r.   launch_metadatar>   r:   r'  r   )	rB   gridrS  r&  retarg_dictarg_idxiarg_namer    r    r!   rV    s   
zCompiledKernel.launch_metadatac                   s       d d fdd
}|S )N)rS  c              
     sp   | d u rt j }t j|} j | g|R  }j d  d  d | jj|tj	j
tj	jg	|R   d S )Nr   r   r   )r   r   rD  get_current_streamrV  rF  r@  r=  r   rT  rU  launch_exit_hook)rS  r&  r   rV  rW  rB   r    r!   runner  s   
"z*CompiledKernel.__getitem__.<locals>.runner)rN  )rB   rW  r`  r    r_  r!   __getitem__  s   zCompiledKernel.__getitem__)	r2   re   rf   rD   rN  rP  rV  ra  __classcell__r    r    rQ  r!   r     s    r   )r   r   rb   )r   r	   r#   r   )3
__future__r   rP   r   _C.libtritonr   r   r   backends.compilerr   r   r	   r   r
   r   runtime.autotunerr   runtime.cacher   r   r   runtime.driverr   tools.disasmr   pathlibr   r   	functoolsr   r   r   ptx_prototype_patternrp   ptx_arg_type_patternrs   r   r"   rg   	lru_cacher   r   r   r   r   r  r   r   r5   r(  r   r    r    r    r!   <module>   sP    
&*
#

&
' 
