o
    hM                     @   s   d dl mZmZmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlmZ d	efd
dZdd Zdd ZeddG dd dZG dd deZdS )    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                 C   s   dd S )Nc                 S   s   dS )N   r   r    )lhs_typerhs_typer   r   W/var/www/html/scripts/venv/lib/python3.10/site-packages/triton/backends/amd/compiler.py<lambda>   s    z"get_min_dot_size.<locals>.<lambda>r   r   r   r   r   get_min_dot_size   s   r   c                 C      t jjd u r
| dkS t jjS Ngfx942)r	   r   use_block_pingpongarchr   r   r   is_pingpong_schedule_enabled      r    c                 C   r   r   )r	   r   use_in_thread_transposer   r   r   r   is_in_thread_transpose_enabled   r!   r#   T)frozenc                   @   s&  e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	e
ed	< d
Zeed< dZeed< dZeed< dZeed< dZee ed< dZee ed< dZeed< dZee ed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Zeed!< d"Zeed#< d$d% Zd&d' Z dS )(
HIPOptions   	num_warpsr   waves_per_eu   
num_stagesnum_ctasNextern_libsr   cluster_dimsFdebugTsanitize_overflowr   )fp8e5supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r3   allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridr   matrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_namenoneschedule_hintc                 C   s   t | jdd }|dkrdnd}t| d| | jdkr'| j| jd @ dks+J d	| jd
kr9| jdks9J dttjd }| j	d u rGi nt
| j	}dD ]}t|| d ||< qNt| dt|  d S )N   
       @   	warp_sizer   r   znum_warps must be a power of 2gfx950zgfx950 only accepts kpack == 1lib)ocmlocklz.bcr,   )intr   object__setattr__r'   r9   r   __file__parentr,   dictstrtupleitems)self	gfx_majorrE   default_libdirr,   rG   r   r   r   __post_init__E   s    
zHIPOptions.__post_init__c                 C   s.   d dd | j D }t|d S )N_c                 S   s   g | ]\}}| d | qS )-r   ).0namevalr   r   r   
<listcomp>V   s    z#HIPOptions.hash.<locals>.<listcomp>utf-8)join__dict__rR   hashlibsha256encode	hexdigest)rS   keyr   r   r   hashU   s   zHIPOptions.hash)!__name__
__module____qualname__r'   rJ   __annotations__r(   r*   r+   r,   rO   r-   rQ   r.   boolr/   r   rP   r1   r   r2   r4   r5   r6   r7   r8   r9   r:   r;   r=   r?   rV   re   r   r   r   r   r%      s0   
 r%   c                       s  e Zd ZedefddZdeddf fddZdefdd	Zde	fd
dZ
dd Zdd Zdeeef fddZdd Zedd Zedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zed&d' Zd(d) Ze d*d+ Z  ZS ),
HIPBackendr   c                 C   s
   | j dkS )Nr<   )backendr   r   r   r   supports_target\   s   
zHIPBackend.supports_targetreturnNc                    s&   t  | t|jtsJ d| _d S )Nhsaco)super__init__
isinstancer   rP   
binary_ext)rS   r   	__class__r   r   rq   `   s   
zHIPBackend.__init__c                 C   s   d|j  S )Nhip:r   rS   optionsr   r   r   get_target_namee   s   zHIPBackend.get_target_namec                    s   dt jjp| jji}| jjdkr#ttj}|dh t	t
||d< d vr]ttj}| jjdkr:|h d n| jjdkrH|dd	h nd
| jjv rU|dd	h t	t
||d< d vrgt jj|d< | fddtj D  tdi |S )Nr   r   tf32r5   r1   >   fp8e4b8fp8e4nvfp8e5b16rF   r|   r0   gfx12r6   c                    s*   i | ]}| v r | d ur| | qS Nr   )rY   koptsr   r   
<dictcomp>}   s    z,HIPBackend.parse_options.<locals>.<dictcomp>r   )r	   runtimeoverride_archr   r   setr%   r5   updaterQ   sortedr1   languagedefault_fp_fusion__dataclass_fields__keys)rS   r   argsr5   r1   r   r   r   parse_optionsh   s$   

zHIPBackend.parse_optionsc                 C   s(   |j |j|j|jd |jd |jd fS )Nr   r   r)   )r'   r+   sharedr-   )rS   metadatar   r   r   pack_metadata   s   zHIPBackend.pack_metadatac                 C   s   dt | jiS )Nmin_dot_size)r   r   rw   r   r   r   get_codegen_implementation      z%HIPBackend.get_codegen_implementationc                 C   s   ddl m} d|iS )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )rS   r   r   r   r   get_module_map   s   zHIPBackend.get_module_mapc                 C   s   t | d S r   )r   load_dialects)rS   ctxr   r   r   r      r   zHIPBackend.load_dialectsc                 C   sL   dd l }d}t| dr|  |kS t| |jr$t| dr$|   |kS dS )Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   rr   Tensorr   size)argr   
MAX_INT_32r   r   r   is_within_2gb   s   
zHIPBackend.is_within_2gbc                 C   s$   t | }d| v r|ddgg7 }|S )NSztt.pointer_rangerC   )r   
parse_attr)descretr   r   r   r      s   
zHIPBackend.parse_attrc                 K   s:   t j| |fi |}tjjr|dkrt| r|d7 }|S )Ntensorr   )r   get_arg_specializationr	   r   use_buffer_opsrk   r   )r   tykwargsr   r   r   r   r      s   z!HIPBackend.get_arg_specializationc                  C   sn   t jj} | d urt| }| r|S ttjd }| r|S td}| r)|S td}| r3|S td)Nzllvm/bin/ld.lldz/opt/rocm/llvm/bin/ld.lldz/usr/bin/ld.lldzWROCm linker /opt/rocm/llvm/bin/ld.lld not found. Set 'TRITON_HIP_LLD_PATH' to its path.)r	   r   lld_pathr   is_filerM   rN   	Exception)lld_env_pathlldr   r   r   path_to_rocm_lld   s   zHIPBackend.path_to_rocm_lldc                 C   s   t | j}|  tj| tj| tj	| tj
| tj| tj| tj| tj| tj| tj| ||  | S r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   rx   pmr   r   r   	make_ttir   s   
zHIPBackend.make_ttirc                 C   s  t | j}|  tj|d|j |j|j	|j
 ||  t | j}|  tj| tj| tj| tjj||j|j|j tj| tjj| tj|d tjj| tj| tj| tj| tj| tjj}tjj}tjj}|jdkrd }}tjj ||j!||| |rtjj"||j tj| |j# dkrtjj$||j tj|d tj| tj%| t&|jrtjj'| tj| tjj(| t)|j}|r|j!dkrtjj*||j! tjj+rtjj,| tj| tjj-||j tjj.| tj| tj/| tj0| |r?tjj1||j ||  | S )Nrv   Tzlocal-prefetchr   r>   r)   )2r   r   r   r   r   r   add_convert_to_ttgpuirr   r'   rE   r+   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr   add_accelerate_matmulr8   r9   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r	   global_prefetchlocal_prefetchuse_async_copyr?   add_stream_pipeliner*   add_coalesce_async_copylowerinsert_instruction_sched_hintsadd_reduce_data_duplicationr#   add_in_thread_transposeadd_reorder_instructionsr    add_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsadd_fold_true_cmpir   r   add_update_async_wait_count)r   r   rx   r   r   r   r   r   r   r   r   
make_ttgir   sj   





zHIPBackend.make_ttgirc                 C   sb   | }t |j}|  tj| tj| tj	
| tj| tj| || |S r   )r   r   r   r   r   r   r   r   add_sccpr   add_loop_aware_cser    add_combine_tensor_select_and_ifr   )srcr   rx   r   r   r   r   r   	ttgir_opt  s   
zHIPBackend.ttgir_optc                    s&  | }t |j}|  d}tjj||j| tj	
| tj	| tj| d}tjj||j| tj| tj| tj	| tj	| tj| tj| tj| |j dkrrtjj||j|j tjjs|tj| tjj|| || t   t }t!|| t"  d}tjj#rd}t$ tj%|j| t& |j t' d t( dd t( d	d t( d
d t( d|j)dk dd  * D }	|	d +tj, |	d -dd|j.|j)   |	d -d|j/  |j0r
dnd}
|	d -d|
 tjj#r&|	d 1d |	d 2  t3|	d  tjj#rQt4t5j6d }t7|d t7|d t7|d g}t8 | n|j9re fdd|j9D }t8 | t: tj;|jdg |j< tjj=r~t>|	d  | ?d|d< t@  tA  t7 S )Nr   Tr>    +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rD   c                 S   s   g | ]}|  s|qS r   )is_declaration)rY   fnr   r   r   r\   `  s    z(HIPBackend.make_llir.<locals>.<listcomp>zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr3   zdenormal-fp-math-f32rG   z
asanrtl.bczocml.bczockl.bcc                    s    g | ]\}}t  |r|qS r   )r   need_extern_lib)rY   rZ   pathllvm_modr   r   r\     s     z
ttg.sharedr   )Br   r   r   r   r   r   r   add_optimize_lds_usager   convertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   r?   r   lower_instruction_sched_hintsr*   r	   compilationdisable_line_infollvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrE   get_functionsset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr'   r(   r:   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   rM   rN   rP   link_extern_libsr,   optimize_moduleOPTIMIZE_O3r6   scalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r   r   rx   r   r   custom_lds_size_HIPBackend__HIP_FTZr   target_featuresfnsdenormal_moderU   pathsr   r   r   	make_llir#  s   









zHIPBackend.make_llirc              	   C   sx   t d| }t|dksJ |d |d< g }|jdkr |d t| tj|j	d||j
d}tjjr:td	 t| |S )
Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rZ   	attentionzsink-insts-to-avoid-spillsr   Fz!// -----// AMDGCN Dump //----- //)refindalllenr?   appendr   translate_to_asmr   r  r   r6   r	   dump_amdgcnprint)r   r   rx   namesflagsamdgcnr   r   r   make_amdgcn  s   

zHIPBackend.make_amdgcnc                 C   s  d}t jjrd}t| |j|}t }t	 h}t	 1}t
|jd}|| W d    n1 s4w   Y  t|ddd|jd|jg W d    n1 sQw   Y  t
|jd}	|	 }
W d    n1 skw   Y  W d    |
S W d    |
S 1 sw   Y  |
S )	Nr   r   wbz-flavorgnuz-sharedz-orb)r	   r   r  r   assemble_amdgcnr   rk   r   tempfileNamedTemporaryFileopenrZ   write
subprocess
check_callread)r   r   rx   r  ro   	rocm_pathtmp_outtmp_infd_infd_outr   r   r   r   
make_hsaco  s,   




zHIPBackend.make_hsacoc                    s   |t jkr fdd|d<  fdd|d< n|t jkr& fdd|d<  fdd|d<  fd	d|d
<  fdd|d< d S )Nc                        | | S r   )r   r   r   rx   rS   r   r   r         z'HIPBackend.add_stages.<locals>.<lambda>r   c                    r:  r   )r   r;  r<  r   r   r     r=  ttgirc                    r:  r   )r   r;  r<  r   r   r     r=  c                    r:  r   )r  r;  r<  r   r   r     r=  llirc                    r:  r   )r(  r;  r<  r   r   r     r=  r'  c                    r:  r   )r9  r;  r<  r   r   r     r=  ro   )r   TRITONGLUON)rS   stagesrx   r   r   r<  r   
add_stages  s   

zHIPBackend.add_stagesc                 C   s&   t jt dgdd}| d| j S )Nz	--versionr]   )encodingrX   )r1  check_outputrk   r   r   )rS   versionr   r   r   re     s   zHIPBackend.hash) rf   rg   rh   staticmethodr   rm   rq   rP   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r(  r9  rC  	functools	lru_cachere   __classcell__r   r   rt   r   rk   Z   sB    







=

m


rk   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r   tritonr	   dataclassesr
   typingr   r   r   typesr   r`   r-  r  r1  rH  pathlibr   r   r    r#   r%   rk   r   r   r   r   <module>   s$    <