o
    hz                    @  s   d dl mZ d dlZd dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlmZ ddlmZ ddlmZ ed	Zed
ZG dd deZG dd de	e ZdS )    )annotationsN)ListOptionalSequenceTupleTypeVarGenericType)driver   )ir   )coreTTensorTyc                      s   e Zd Z fddZ  ZS )IncompatibleTypeErrorImplc                   s@   || _ || _d| j   d | j  | _tt| | j d S )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__ S/var/www/html/scripts/venv/lib/python3.10/site-packages/triton/language/semantic.pyr      s   z"IncompatibleTypeErrorImpl.__init__)__name__
__module____qualname__r   __classcell__r   r   r   r   r      s    r   c                   @  s  e Zd ZU ejZded< eZded< dd ZdSddZdSddZ	dTddZ
dUddZdVdWddZdXd"d#Z		$dYdZd)d*Zd[d-d.Zd\d2d3Zd\d4d5Zd\d6d7Zd]d8d9Zd]d:d;Zd^d=d>Zd]d?d@Zd_dEdFZd_dGdHZd`dKdLZdadMdNZdbdOdPZdbdQdRZdbdSdTZdbdUdVZdbdWdXZdcdYdZZ dbd[d\Z!dbd]d^Z"dbd_d`Z#dddadbZ$dddcddZ%dddedfZ&dedidjZ'dbdkdlZ(dbdmdnZ)dbdodpZ*dbdqdrZ+dbdsdtZ,dbdudvZ-dwdxdfd|d}Z.dgddZ/dgddZ0dhddZ1diddZ2djddZ3dkddZ4dlddZ5dmddZ6dnddZ7doddZ8dpddZ9dqddZ:drddZ;dsddZ<dtduddZ=dd Z>dd Z?dd Z@dd ZAdd ZBdd ZCdd ZDdd ZEdd ZFdvddǄZGdwdd˄ZHdxdd̈́ZIdyddτZJdyddфZKddӄ ZLddՄ ZMdyddׄZNdyddلZOdyddۄZPdydd݄ZQdydd߄ZRdzddZSd{ddZTdd ZUdd ZVd|ddZWd}ddZXd~ddZYdddZZdddZ[dddZ\dddZ]dddZ^dddZ_dd dZ`dddZadd Zbdd
dZcdddZddddZedddZfdddZgdd Zhdd#d$Zidd&d'Zjdd*d+Zkdd-d.Zldd0d1Zmdd2d3Zndd4d5Zodd6d7Zpdd<d=Zqdd@dAZrddBdCZsdDdE ZtdVdFdGZuddIdJZvddKdLZwddQdRZxdwS (  TritonSemanticzType[TensorTy]tensorz
ir.builderbuilderc                 C  s
   || _ d S N)r$   )r   r$   r   r   r   r      s   
zTritonSemantic.__init__axisintreturnr   c                 C  ,   |dvrt d| | | j|tjS )Nr   r   r   z+program_id axis must be 0, 1, or 2 but got )
ValueErrorr#   r$   create_get_program_idtlint32r   r&   r   r   r   
program_id&      zTritonSemantic.program_idc                 C  r)   )Nr*   z-num_programs axis must be 0, 1, or 2 but got )r+   r#   r$   create_get_num_programsr-   r.   r/   r   r   r   num_programs+   r1   zTritonSemantic.num_programsa_tytl.dtypeb_tyc                 C  s   |j }|j }|j}|j}||kr||kr|S |S |tjjjkr'||kr%|S |S |tjjjkr6||kr4|S |S td| d| )Nzunexpected signedness r   )int_bitwidthint_signednessr-   dtype
SIGNEDNESSUNSIGNED	TypeError)r   r4   r6   a_rankb_ranka_snb_snr   r   r   integer_promote_impl4   s   z#TritonSemantic.integer_promote_impla_is_scalarboolb_is_scalar
div_or_modc                 C  sV  ||kr)|r
||fn||f\}}|  j|  jkr)|r'|tjtjfv r'tjS |S | s1| r4tjS | s<| r?tjS |	 sG|	 rO|rLtjS tjS |
 r_|
 r_|r\tjS tjS |
 sg|
 rjtjS | r{| r{||krx|S tjS | r| std| d| |r|j|jkrtd|  d |  d | ||S )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer-   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intr<   r8   r   rA   )r   r4   rB   r6   rD   rE   	scalar_ty	tensor_tyr   r   r   computation_type_implC   s:   z$TritonSemantic.computation_type_implT
check_typec                 C  s  t |tr| | j|tjS t |trdd|  krdk r%n ntj}n8d|  kr/dk r5n ntj	}n(d|  kr?dk rEn ntj
}nd|  krOdk rUn ntj}ntd| d| j||d	S t |trd
}ddd  }td |}|tdks|dks||ks||  kr|krn ntj}ntj}| j||d	S t |tjr| |jS t || jr|S |rtd| dt| d|S )N           l                             l            zNonrepresentable integer .r9   g      8g   ?r      absinfg        zcannot convert z	 of type z
 to tensor)
isinstancerC   r#   r$   get_int1r-   int1r'   r.   uint32int64uint64r+   scalar_constantfloat__builtins__rL   rN   	constexpr	to_tensorrI   r<   type)r   xrW   r9   min_float32max_float32abs_xr   r   r   rk   u   s>   


zTritonSemantic.to_tensorr   r   allow_ptr_aNonec                 C  sJ   |  r!|st|||  r||krt||| r#t||d S d S r%   )is_ptrr   is_floating)r   r   r   rq   r   r   r   check_ptr_type_impl   s   


z"TritonSemantic.check_ptr_type_implFlhsTensorTy | numbers.NumberrhsTuple[TensorTy, TensorTy]c                 C  s  t |tj}t |tj}|r|}	| |}|r|}
| |}|jj}|jj}| ||| | ||| |r| s| s| |||||}|rQ|	dk rQ|	 s[|r_|
dk r_|	 r_t
d| r|r~| |	  krs| ks~n t
d|	 d| |r| |
  kr| ksn t
d|
 d| |r| j|	|dn| ||}|r| j|
|dn| ||}| ||\}}||fS )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type r]   )ra   numbersNumberrk   rl   scalarru   rs   rV   is_int_unsignedr+   rS   get_int_min_valueget_int_max_valuerg   castbroadcast_impl_value)r   rv   rx   allow_lhs_ptrallow_rhs_ptrarithmetic_checkrE   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tyr   r   r   binary_op_type_checking_impl   s@   

z+TritonSemantic.binary_op_type_checking_impl	binary_opcallablec                 C  s   |j jjdks| jjjsd S |j j}|j j}||ksJ | s"J | |tj	}| |tj	}|||d}|
 }| |tj	}| }| |tj	}| | ||| ||}	d|j d|j }
| |	|
 d S )N@   Fr'   z! overflow detected for operation )rl   r|   r7   r$   optionssanitize_overflowrS   r   r-   re   r   rg   r~   and_
less_equalgreater_equalr   device_assert)r   rv   rx   r   r   r   ret	max_value	min_valuecondmsgr   r   r    binary_op_sanitize_overflow_impl   s    z/TritonSemantic.binary_op_sanitize_overflow_implinputotherr   c                 C  s6  |  ||dd\}}|jj}|jj}| r| rtd| r3| s3||}}|jj}|jj}| rf|j}|j rY|jjdk rY|j	t
j| j}| j|j|d}| | j|j||jS | rx| | j|j|j|jS | r|r| ||| j | | j|j|j|jS td| )NTzcannot add pointers togetherr   FrF   )r   rl   r|   rs   r<   handler9   r}   r7   with_element_tyr-   re   to_irr$   create_int_castr#   create_addptrrt   create_faddrS   r   add
create_add)r   r   r   r   input_scalar_tyother_scalar_tyother_handlei64_tyr   r   r   r      s,   
zTritonSemantic.addc                 C  s   |  ||dd\}}|jj}| r| j|| |ddS | r/| | j	|j
|j
|jS | rK|r=| ||| j | | j|j
|j
|jS td| )NTF)r   rF   )r   rl   r|   rs   r   minusrt   r#   r$   create_fsubr   rS   r   sub
create_subr<   r   r   r   r   rT   r   r   r   r      s   zTritonSemantic.subc                 C  s   |  ||\}}|jj}| r| | j|j|j|jS | r:|r,| 	||| j
 | | j|j|j|jS td| NrF   )r   rl   r|   rt   r#   r$   create_fmulr   rS   r   mul
create_mulr<   r   r   r   r   r     s   zTritonSemantic.mulc                 C  s   |  ||dddd\}}|jj}|jj}| r#| r#| ||}nI| r2| r2| ||}n:| rI| rI| |tj}| |tj}n#| re| re|j|jkr^| ||}n| ||}nt	d| | 
| j|j|j|jS NFTrF   )r   rl   r|   rt   rS   r   r-   rL   fp_mantissa_widthr<   r#   r$   create_fdivr   )r   r   r   r   r   r   r   r   truediv  s    zTritonSemantic.truedivc                 C  s   |  ||dddd\}}|jj}|jj}| rN| rN| ||}| ||}| ||}| r@| | j	|j
|j
|jS | | j|j
|j
|jS td| r   )r   rl   r|   rS   rA   r   is_int_signedr#   r$   create_sdivr   create_udivr<   )r   r   r   r   r   ret_tyr   r   r   floordiv7  s   zTritonSemantic.floordivieee_roundingc                 C  s`   |j j}|j j}| r| std| ||dddd\}}| j|j|j}| ||j S )Nz4both operands of fdiv must have floating scalar typeFT)	rl   r|   rt   r<   r   r$   r   r   r#   )r   r   r   r   r   r   r   r   r   r   fdivE  s   zTritonSemantic.fdivc                 C  s   |  ||dddd\}}|jj}|jj}| r&| | j|j|j|jS | r`|j	|j	kr@t
d|  d |  d | rR| | j|j|j|jS | | j|j|j|jS t
d| )NFTzCannot mod z by rG   rF   )r   rl   r|   rt   r#   r$   create_fremr   rS   r8   r<   r   r   create_sremcreate_urem)r   r   r   rT   r   r   r   r   modN  s    zTritonSemantic.modrm   ypropagate_nantl.PropagateNanc                 C     |  ||\}}|j}| r>|tjjkr#| | j|j	|j	|j
S |tjjkr7| | j|j	|j	|j
S td| | rP| | j|j	|j	|j
S | rb| | j|j	|j	|j
S td| NzUnexpected propagate_nan Unexpected dtype )r   r9   rt   r-   PropagateNanALLr#   r$   create_minimumfr   rl   NONEcreate_minnumfr+   r   create_minsir}   create_minuir<   r   rm   r   r   r9   r   r   r   minimume     zTritonSemantic.minimumc                 C  r   r   )r   r9   rt   r-   r   r   r#   r$   create_maximumfr   rl   r   create_maxnumfr+   r   create_maxsir}   create_maxuir<   r   r   r   r   maximumv  r   zTritonSemantic.maximumminmaxc                 C  sp   |  ||\}}|  ||\}}|  ||\}}|j}| r0| | j|j|j|j||jS td| d)Nr   z(. Only floating point clamp is supported)	r   r9   rt   r#   r$   create_clampfr   rl   r<   )r   rm   r   r   r   r9   r   r   r   clamp  s   "zTritonSemantic.clampc                 C  sv   |  ||\}}|jj}|jj}| r| st||| ||}||kr-| ||}||kr7| ||}||fS r%   )r   rl   r|   rS   r   rA   r   )r   r   r   input_sca_tyother_sca_tyr   r   r   r   bitwise_op_type_checking_impl  s   
z,TritonSemantic.bitwise_op_type_checking_implc                 C  ,   |  ||\}}| | j|j|j|jS r%   )r   r#   r$   
create_andr   rl   r   r   r   r   r   r   r        zTritonSemantic.and_c                 C  r   r%   )r   r#   r$   	create_orr   rl   r   r   r   r   or_  r   zTritonSemantic.or_c                 C  r   r%   )r   r#   r$   
create_xorr   rl   r   r   r   r   xor_  r   zTritonSemantic.xor_c                 C  <   |j  s| |tj}|j  s| |tj}| ||S r%   )rl   is_int1bitcastr-   rc   r   r   r   r   r   logical_and  
   

zTritonSemantic.logical_andc                 C  r   r%   )rl   r   r   r-   rc   r   r   r   r   r   
logical_or  r   zTritonSemantic.logical_orc                 C  s"   |j  s| |tj}| |S r%   )rl   r   r   r-   rc   invertr   r   r   r   r   not_  s   

zTritonSemantic.not_c                 C  r   r%   )r   r#   r$   create_lshrr   rl   r   r   r   r   lshr  r   zTritonSemantic.lshrc                 C  r   r%   )r   r#   r$   create_ashrr   rl   r   r   r   r   ashr  r   zTritonSemantic.ashrc                 C  r   r%   )r   r#   r$   
create_shlr   rl   r   r   r   r   shl  r   zTritonSemantic.shlc                 C  s   |S r%   r   r   r   r   r   plus  s   zTritonSemantic.plusc                 C  sN   |j j}| rtd|  d | | j|| j|}| 	||dS )Nz$wrong type argument to unary minus ()T)
rl   r|   rs   r+   r   r#   r$   get_null_valuer   r   )r   r   r   _0r   r   r   r     s
   zTritonSemantic.minusc                 C  sT   |j j}| s| rtd|  d | | j|	| j|}| 
||S )Nz%wrong type argument to unary invert (r   )rl   r|   rs   rt   r+   r   r#   r$   get_all_ones_valuer   r   )r   r   r   _1r   r   r   r     s
   zTritonSemantic.invertvtl.block_typec                 C  s   |j tjS r%   )rl   r   r-   rc   )r   r   r   r   r   
_bool_like  s   zTritonSemantic._bool_likec                 C     |  ||\}}|jj}| r | | j|j|j| |S |	 rH|
 r8| | j|j|j| |S | | j|j|j| |S td| r   )r   rl   r|   rt   r#   r$   create_fcmpOGTr   r   rS   r   create_icmpSGTcreate_icmpUGTr<   r   r   r   rT   r   r   r   greater_than        zTritonSemantic.greater_thanc                 C  r   r   )r   rl   r|   rt   r#   r$   create_fcmpOGEr   r   rS   r   create_icmpSGEcreate_icmpUGEr<   r  r   r   r   r     r  zTritonSemantic.greater_equalc                 C  r   r   )r   rl   r|   rt   r#   r$   create_fcmpOLTr   r   rS   r   create_icmpSLTcreate_icmpULTr<   r  r   r   r   	less_than  r  zTritonSemantic.less_thanc                 C  r   r   )r   rl   r|   rt   r#   r$   create_fcmpOLEr   r   rS   r   create_icmpSLEcreate_icmpULEr<   r  r   r   r   r     r  zTritonSemantic.less_equalc                 C  v   |  ||\}}|jj}| r | | j|j|j| |S |	 r4| | j
|j|j| |S td| r   )r   rl   r|   rt   r#   r$   create_fcmpOEQr   r   rS   create_icmpEQr<   r  r   r   r   equal"       zTritonSemantic.equalc                 C  r  r   )r   rl   r|   rt   r#   r$   create_fcmpUNEr   r   rS   create_icmpNEr<   r  r   r   r   	not_equal-  r  zTritonSemantic.not_equalN)r   startendr   c          	      C  s   t |tr
t |tstdt|d? }t|d? }|s|r"td||kr*td|| }||d @ dkr:td|g}|d u rHttj|}|| j}| 	| j
||||S )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)ra   r'   r+   rC   r-   
block_typer.   r   r$   r#   create_make_range)	r   r  r  r   is_start_int64is_end_int64rangeshape	ret_ty_irr   r   r   arange<  s    zTritonSemantic.aranger9   c                 C  sV   |d u rt d|dkr| j|| j}nt| jd|j }||}| ||S )Nz2dtype must be specified when value is not a tensorr   get_)r+   r$   r   r   getattrnamer#   )r   rI   r9   get_value_fnr   r   r   rg   N  s   zTritonSemantic.scalar_constantc                 C  s8   t |tjr|jjdksJ d| ||S | ||S )Nr   zonly accepts size-1 tensor)ra   r-   r#   numelrI   r   rg   )r   rI   r9   r   r   r   make_scalarY  s   zTritonSemantic.make_scalarr  	List[int]c                 C  s   |  | |||S r%   )splatr'  )r   r  rI   r9   r   r   r   full`     zTritonSemantic.fullrI   c                 C  sP   |j  r	J dt|dkr|S t|j|}| | j|	| j|j
|S )NzCannot splat a block tensorr   )rl   is_blocklenr-   r  r9   r#   r$   create_splatr   r   )r   rI   r  r   r   r   r   r)  g  s
    zTritonSemantic.splat	dst_shapecan_reorderc                 C  sT   d}|D ]}||9 }q|j j|krtdt|j j|}| | j|j	|||S )Nr   z:reshape() cannot change total number of elements in tensor)
rl   r&  r+   r-   r  r|   r#   r$   create_reshaper   )r   r   r/  r0  r&  sr   r   r   r   reshapen  s   
zTritonSemantic.reshapec                 C  s\   dd |j D }||d |j s| j||dS t|jj|}| | j	
|j||S )Nc                 S  s   g | ]}t |qS r   r-   _unwrap_if_constexpr.0rm   r   r   r   
<listcomp>x      z.TritonSemantic.expand_dims.<locals>.<listcomp>r   r  )r  insertrl   r,  r)  r-   r  r|   r#   r$   create_expand_dimsr   )r   r   r&   r/  r   r   r   r   expand_dimsw  s   
zTritonSemantic.expand_dimsc                 C  sZ   |sJ dt |jdksJ t|jj|jd |jd  g}| | j|j	|j	|S )Nz;current implementation of `cat` always may reorder elementsr   r   )
r-  r  r-   r  rl   r|   r#   r$   
create_catr   )r   rv   rx   r0  ret_typer   r   r   cat  s   "zTritonSemantic.catabc                 C  s   |  ||\}}|jg k}|r| |d}| |d}t|jd tjr*td}nd}|j|g }t|jj|}| 	| j
|j|j|}|rR| j|dgdd}|S )Nr   r   Fr0  )r   r  r=  ra   r-   rj   r  rl   r|   r#   r$   create_joinr   r3  )r   rA  rB  
was_rank_1two	new_shaper?  r   r   r   r   join  s   
zTritonSemantic.joinc                 C  sr   t |jdks	J t|jd dksJ |jd d }t|jj|}| j|j	\}}| 
||| 
||fS )Nr   rC  r   )r-  r  r-   r5  r  rl   r|   r$   create_splitr   r#   )r   rA  rH  r?  outLHSoutRHSr   r   r   split  s   

zTritonSemantic.splitdims
Tuple[int]c                   s   t  jt |krtdtdd |D ttt |kr%td| t jj	 fdd|D }| 
| j j||S )Nz5permute dims must have the same length as input shapec                 s  s    | ]}t |V  qd S r%   r4  r7  dr   r   r   	<genexpr>  s    z)TritonSemantic.permute.<locals>.<genexpr>z?permute dims must be a permutation of 0, 1, ..., n-1, but were c                   s   g | ]} j | qS r   r:  rP  r   r   r   r8    r9  z*TritonSemantic.permute.<locals>.<listcomp>)r-  r  r+   sortedlistr  r-   r  rl   r|   r#   r$   create_transr   )r   r   rN  r?  r   rS  r   permute  s   "zTritonSemantic.permutec                 C  s   |j  s| ||S |j  }t|t|kr"td| d| ||kr(|S t|D ]#\}}|| |krO|dkrOtd||  d| d| d| d| 
q,t|j j	|}| 
| j|j||S )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )rl   r,  r)  get_block_shapesr-  r+   	enumerater-   r  r|   r#   r$   create_broadcastr   )r   r   r  	src_shapeiitemr   r   r   r   broadcast_impl_shape  s*   

z#TritonSemantic.broadcast_impl_shapec              	   C  sj  |j }|j }| r(| s(||j}| | j|| j|j|}||fS | sJ| rJ||j}| | j|| j|j|}||fS | r1| r1|	 }|	 }t
|t
|k rtt
|t
|D ]}| | j|jdt|jdg|j }|j }|	 }qmn0t
|t
|k rtt
|t
|D ]}| | j|jdt|jdg|j }|j }|	 }qt
|t
|ksJ g }t|D ]3\}	}
||	 }|
dkr|| q|dks||
kr||
 qtdt|	 d t|
 d t| ||krt|j|}| | j|j||}||kr1t|j|}| | j|j||}||fS )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index rX  r   )rl   r,  r   r|   r#   r$   r.  r   r   rY  r-  r  r<  r-   r  valuesrZ  appendr+   strr[  )r   rv   rx   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaper]  leftrightr   r   r   r   r     sl    + '



z#TritonSemantic.broadcast_impl_valuerounding_modeOptional[str]c                 C  s<   |d u rd S |dkrt jjS |dkrt jjS td| d)NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r   ROUNDING_MODERTNERTZr+   )r   rk  r   r   r   _str_to_rounding_mode  s   z$TritonSemantic._str_to_rounding_modedst_tyc                 C  s   |j }| r||j}||kr|S |j}|j}| s!| r'| ||S |j}|j}||kr?tdt| d t| | 	| j
|j|| j
|S )Nz!Cannot bitcast data-type of size z to data-type of size )rl   r,  r   r|   rs   r   primitive_bitwidthr+   rb  r#   r$   create_bitcastr   r   )r   r   rs  src_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitsr   r   r   r     s     zTritonSemantic.bitcastfp_downcast_roundingc                 C  sr  |j }|j}|j}||kr|S | r||}| |}d}| r?| r?|j|jk r?|d u r6tjj	}n|tjj	kr>d}n|d urQt
dt| d t| | sY| rr| jjdd usfJ d| jjd |||| dS | rz| s| r| s|r| | j|j|| j||S | r| r| r| s| | |tj|S | o| o|j|jk}|r| | j|j|| j|S | o| o|j|jk }	|	r| | j|j|| j|S | r@| r@|j|jks|j|jkr@|  o|!  }
|! r/|j"| j}| | j#||j"}| $||S | | j%|j|| j|
|S |& r| r|! rg|j"| j}| | j#||j"}| $||S |  r|| | j'|j|| j|S | | j(|j|| j|S | r|& r|! s|  s| | j)|j|| j|S | | j*|j|| j|S |+ r| r|j}|dkr| | j,|j|| j|S |d	kr| $| |tj-| | j.d
tj-S | r|+ r| | j/|j|| j|S |+ r/|+ r/| | j0|j|| j|S J d| d| )NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type.)	_semanticr   r   r   zcannot cast z to )1rl   r|   r,  r   rr  rt   rt  r   ro  rp  r+   rb  is_fp8e4b15r$   codegen_fnsgetrR   r#   create_fp_to_fpr   r   rP   rO   rQ   r   r-   rL   create_fp_trunccreate_fp_extrS   r7   r8   r   is_boolr9   r   r  r   is_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fprs   create_ptr_to_intre   	get_int64create_int_to_ptrru  )r   r   rs  r{  rv  rw  rx  use_custom_roundingtruncate_fpext_fpsign_extendtyr   bitwidthr   r   r   r     s   


 
 


    
 
(  zTritonSemantic.castc                 C  s\   t jj}|r,|dkrt jj}|S |dkrt jj}|S |dkr$t jj}|S td| d|S )Nz.ca.cgz.cvCache modifier  not supported)r   CACHE_MODIFIERr   CACGCVr+   r   cache_modifiercacher   r   r   _str_to_load_cache_modifier     z*TritonSemantic._str_to_load_cache_modifierc                 C  sp   t jj}|r6|dkrt jj}|S |dkrt jj}|S |dkr$t jj}|S |dkr.t jj}|S td| d|S )Nz.wbr  z.csz.wtr  r  )r   r  r   WBr  CSWTr+   r  r   r   r   _str_to_store_cache_modifier      	z+TritonSemantic._str_to_store_cache_modifierc                 C  sH   t jj}|r"|dkrt jj}|S |dkrt jj}|S td| d|S )N
evict_lastevict_firstzEviction policy r  )r   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr+   )r   eviction_policyevictionr   r   r   _str_to_eviction_policy  s   z&TritonSemantic._str_to_eviction_policyc                 C  sD   d }|r |dkrt jj}|S |dkrt jj}|S td| d|S )NzeronanzPadding option r  )r   PADDING_OPTIONPAD_ZEROPAD_NANr+   )r   padding_optionpaddingr   r   r   _str_to_padding_option  s   z%TritonSemantic._str_to_padding_optionc                 C  sp   t jj}|r6|dkrt jj}|S |dkrt jj}|S |dkr$t jj}|S |dkr.t jj}|S td| d|S )Nacquirereleaseacq_relrelaxedMemory semantic r  )r   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr+   )r   
sem_optionsemr   r   r   _str_to_sem  r  zTritonSemantic._str_to_semc                 C  s\   t jj}|r,|dkrt jj}|S |dkrt jj}|S |dkr$t jj}|S td| d|S )Ngpuctasysr  r  )r   MEM_SYNC_SCOPEGPUCTASYSTEMr+   )r   scope_optionscoper   r   r   _str_to_scope  r  zTritonSemantic._str_to_scopec                 C  s   |rEt |ds
|g}dd |D }|D ]}t|tr(d|  kr't|k s*J  J qt|dks3J t|tt|ksAJ dt|S dS )N__iter__c                 S  "   g | ]}t |tjr|jn|qS r   ra   r-   rj   rI   r7  elemr   r   r   r8       " z?TritonSemantic._canonicalize_boundary_check.<locals>.<listcomp>r   z'Duplicate dimension in `boundary_check`r   )hasattrra   r'   r-  setrT  )r   boundary_checkblock_shapedimr   r   r   _canonicalize_boundary_check  s   
,z+TritonSemantic._canonicalize_boundary_checkc	              
   C  s   |d us|d urt d|jjj}	|	tjksJ d|	 r(|tjjkr(t d|jj}
| 	||

 }| | j|j||||||
S )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r+   rl   
element_tyr-   rc   rS   r   r  r  r  rY  r#   r$   create_tensor_pointer_loadr   )r   ptrmaskr   r  r  r  r  is_volatileelt_tyrs  r   r   r   _load_block_pointer  s   
z"TritonSemantic._load_block_pointerc	              
   C  s  |j j std|j   d|d u r|d urtd|s!|r%td|j  s@|r5|j  r5td|r@|j  r@td|j  r_|d urR| ||j  }|d ur_| ||j  }|j j}	|	j}
|
t	j
k}|r}t	j}
t	|
|	j}	| ||	}|d ur| ||
}|j  r|j |
}n|
}|d u r| | j|j||||}n| | j|j|j|r|jnd ||||}|r| |t	j
}|S )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)rl   r|   rs   r+   r   r,  r_  rY  r  r-   rc   int8pointer_typeaddress_spacer   r   r#   r$   create_loadr   create_masked_load)r   r  r  r   r  r  r  r  r  ptr_tyr  r  rs  r   r   r   r   _load_legacy  sN   



zTritonSemantic._load_legacyr  r  Optional[TensorTy]r  r   r  rb  r  r  r  c	              
   C  sd   |  |}	| |}
| |}|j r&|jj r&| ||||||	|
|S | ||||||	|
|S r%   )	r  r  r  rl   rs   r  r,  r  r  )r   r  r  r   r  r  r  r  r  r  r  r  r   r   r   load2  s   


zTritonSemantic.loaddesctl.tensor_descriptor_basec                 C  sz   t |tjsJ t|j}t||ksJ d| dt| | j|dd}| j|j|| 	|| 
|}| ||jS )N	expected  offsets, but got Frequire_i64)ra   r-   tensor_descriptor_baser-  r  _convert_to_ir_valuesr$   create_descriptor_loadr   r  r  r#   r  )r   r  offsetsr  r  ndimrm   r   r   r   descriptor_load@  s   
$zTritonSemantic.descriptor_loadc                 C  sR   t |tjsJ t|j}t||ksJ d| dt| |j|jks'J d S )Nr  r  )ra   r-   r  r-  r  r  )r   r  rI   r  r  r   r   r   validate_store_likeK  s   
$z"TritonSemantic.validate_store_likec                 C  s:   |  ||| | j|dd}| | j|j|j|tjS NFr  )r  r  r#   r$   create_descriptor_storer   r-   void)r   r  rI   r  r   r   r   descriptor_storeQ  s   zTritonSemantic.descriptor_storec                 C  sn   |  ||| |jtjtjtjtjtjtjhv sJ d| j	|dd}t
jj}| | j||j|j|tjS NUnsupported dtypeFr  )r  r9   r-   rd   r.   rf   rL   rJ   rK   r  r   DESCRIPTOR_REDUCE_KINDADDr#   r$   create_descriptor_reducer   r  r   r  rI   r  rH   r   r   r   descriptor_atomic_addV  s
   * z$TritonSemantic.descriptor_atomic_addc                 C  s   t j }|jdko|jdkS )NcudaZ   )r
   activeget_current_targetbackendarch)r   targetr   r   r   _has_native_tma]  s   
zTritonSemantic._has_native_tmac                 C  sP   |t jt jt jt jt jt jhv sJ d|t jt jhv r$|  s&J dd S d S )Nr  z-16-bit float types require native tma support)r-   rd   r.   rf   re   rJ   rK   r  )r   r9   r   r   r   $_descriptor_atomic_min_max_supporteda  s
   (z3TritonSemantic._descriptor_atomic_min_max_supportedc                 C  P   |  ||| | |j | j|dd}tjj}| | j	||j
|j
|tjS r  )r  r  r9   r  r   r  MINr#   r$   r  r   r-   r  r  r   r   r   descriptor_atomic_minf  
    z$TritonSemantic.descriptor_atomic_minc                 C  r  r  )r  r  r9   r  r   r  MAXr#   r$   r  r   r-   r  r  r   r   r   descriptor_atomic_maxm  r  z$TritonSemantic.descriptor_atomic_maxc                 C  f   |  ||| |jtjtjtjtjhv sJ d| j|dd}tj	j
}| | j||j|j|tjS r  )r  r9   r-   rd   r.   rf   re   r  r   r  ANDr#   r$   r  r   r  r  r   r   r   descriptor_atomic_andt  
   " z$TritonSemantic.descriptor_atomic_andc                 C  r  r  )r  r9   r-   rd   r.   rf   re   r  r   r  ORr#   r$   r  r   r  r  r   r   r   descriptor_atomic_or{  r  z#TritonSemantic.descriptor_atomic_orc                 C  r  r  )r  r9   r-   rd   r.   rf   re   r  r   r  XORr#   r$   r  r   r  r  r   r   r   descriptor_atomic_xor  r  z$TritonSemantic.descriptor_atomic_xorc           
      C  sF  t |tjsJ |dksJ d|dksJ dt|jdks'J d|j |jd dks6J d|j t|jdksEJ d	|j |jd d
ksTJ d|j |j}d|j d
 }|jd |ksuJ d| d| d|jd  t|j|jd |jd g}| j	|fddd }| j
|j|j||| j
}	| |	|S )N z#cache modifier is not supported yetz$eviction policy is not supported yetr   descriptor must be 2D, but got r   r   *descriptor block must have 1 row, but got x offsets must be 1D, but got    z5descriptor gather must have at least 8 rows, but got r  zdescriptor gather of  must have at least  columns, but got Fr  )ra   r-   r  r-  r  r  r9   rt  r  r  r$   create_descriptor_gatherr   r   r#   )
r   r  	x_offsetsy_offsetr  r  r9   min_colsrl   rm   r   r   r   descriptor_gather  s(   z TritonSemantic.descriptor_gatherc                 C  s  t |tjsJ t|jdksJ d|j |jd dks&J d|j t|jdks5J d|j |jd dksDJ d|j |j}d	|j d }|jd |kseJ d
| d| d|jd  | j	|fddd }| j
|j|j|j| | d tjS )Nr   r  r   r   r  r  r  z6descriptor scatter must have at least 8 rows, but got r  zdescriptor scatter of r   r!  Fr  )ra   r-   r  r-  r  r  shapaer9   rt  r  r$   create_descriptor_scatterr   r#   r  )r   r  rI   r#  r$  r9   r%  r   r   r   descriptor_scatter  s"   z!TritonSemantic.descriptor_scatterc           	   	   C  s   |d urt d|jj }|j s| ||}|j s"J d||j ks7J d| d|j  d|jjj|jjksPJ d|jjj d|jj d|jjj}|tjks^J d| ||}| 	||}| 
| j|j|j|||tjS )	Nr  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(r  )r+   rl   r  rY  r,  r_  r-   rc   r  r   r#   r$   create_tensor_pointer_storer   r  )	r   r  valr  r  r  r  r  r  r   r   r   _store_block_pointer  s"   
2
z#TritonSemantic._store_block_pointerc           	   	   C  s:  |j j std|j   d|rtd|j  s0|j  r%td|r0|j  r0td|j  rK| ||j  }|d urK| ||j  }|j j}|j}|t	j
krgt	j}t	||j}| ||}| ||}|d u r| | j|j|j||t	jS |j j std| | j|j|j|j||t	jS )Nr  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockr  "Mask must have boolean scalar type)rl   r|   rs   r+   r   r,  r_  rY  r  r-   rc   r  r  r  r   r#   r$   create_storer   r  r  create_masked_store)	r   r  r+  r  r  r  r  r  r  r   r   r   _store_legacy  s6   



 zTritonSemantic._store_legacyr+  c           	      C  sp   |  |}| |}|j s|jj rtd|j r.|jj r.| 	||||||S | 
||||||S )N"Cannot store to a constant pointer)r  r  rl   is_constr|   r+   rs   r  r,  r,  r0  )	r   r  r+  r  r  r  r  r  r  r   r   r   store  s   

zTritonSemantic.storecmpr  r  c              	   C  sT   |  |}| |}|jjj}|jdvrtd| | j	|j
|j
|j
|||jS )N)   r  r   z9atomic_cas only supports elements with width {16, 32, 64})r  r  rl   r|   r  rt  r+   r#   r$   create_atomic_casr   )r   r  r4  r+  r  r  r  r   r   r   
atomic_cas  s   



$zTritonSemantic.atomic_casop#Tuple[TensorTy, TensorTy, TensorTy]c                 C  sj  |j j std|j   |j  s|j j rtd|j jj}|tju r4|dkr4td| d |tj	u rE|dkrEtd| d |tj
tjfv sR|jdk r^td| d t| |j  r}|d urp| ||j  }|d ur}| ||j  }| ||j jj}|d u r| jd	}tj}|j  r|j tj}| j|| j|}| ||}|||fS )
Nz)Pointer argument of store instruction is r1  r   atomic_z does not support fp16z does not support bf16r5  z does not support T)rl   r|   rs   r+   r   r2  r  r-   rJ   rK   int16uint16rt  rb  r,  r_  rY  r   r$   rb   rc   r   r.  r   r#   )r   r  r+  r  r8  r  mask_irmask_tyr   r   r   atom_red_typechecking_impl  s2   



z)TritonSemantic.atom_red_typechecking_implc                 C  s@   |j j}tj|dd}| ||}| ||d }| |tjS )NF)r  signedr   )r9   rt  r-   get_int_dtyper   r   r   rc   )r   rm   r  idtypeixsignbitr   r   r   _signbit6  s
   zTritonSemantic._signbitc                 C  s  |  |||d\}}}| |}| |}|jj}| rK| r6| | j	t
jj|j|j|j|||jS | | j	t
jj|j|j|j|||jS |tjtjhvrZtd| |tjkrbtjntj}| ||}| |t|d}	|tjkr}tjntj}
| ||
}| |t|
d}| |}| |}| | j	t
jj|	j|j| ||j|||j}| | j	t
jj|j|j| ||j|||j}| |||}| ||S )Nr   z#atomic_max not supported for dtype r   )r?  r  r  rl   r|   rS   r   r#   r$   create_atomic_rmwr   	ATOMIC_OPr  r   UMAXr-   rL   rN   r<   r.   re   r   r  rd   rf   rE  r   r   UMINwherer   r  r+  r  r  r  sca_tyi_typei_vali_ptrui_typeui_valui_ptrnegpospos_retneg_retr   r   r   r   
atomic_max=  L   



zTritonSemantic.atomic_maxc                 C  s  |  |||d\}}}| |}| |}|jj}| rK| r6| | j	t
jj|j|j|j|||jS | | j	t
jj|j|j|j|||jS |tjtjhvrZtd| |tjkrbtjntj}| ||}| |t|d}	|tjkr}tjntj}
| ||
}| |t|
d}| |}| |}| | j	t
jj|	j|j| ||j|||j}| | j	t
jj|j|j| ||j|||j}| |||}| ||S )Nr   z#atomic_min not supported for dtype r   )r?  r  r  rl   r|   rS   r   r#   r$   rF  r   rG  r  r   rI  r-   rL   rN   r<   r.   re   r   r  rd   rf   rE  r   r   rH  rJ  rK  r   r   r   
atomic_minc  rX  zTritonSemantic.atomic_minc              
   C  sp   |  |||d\}}}| |}| |}|jj}| r!tjjntjj	}| 
| j||j|j|j|||jS )Nr   )r?  r  r  rl   r|   rt   r   rG  FADDr   r#   r$   rF  r   )r   r  r+  r  r  r  rL  r8  r   r   r   
atomic_add  s   

zTritonSemantic.atomic_addc              
   C  T   |  |||d\}}}| |}| |}| | jtjj|j	|j	|j	|||j
S )Nand)r?  r  r  r#   r$   rF  r   rG  r  r   rl   r   r  r+  r  r  r  r   r   r   
atomic_and     

"zTritonSemantic.atomic_andc              
   C  r\  )Nor)r?  r  r  r#   r$   rF  r   rG  r  r   rl   r^  r   r   r   	atomic_or  r`  zTritonSemantic.atomic_orc              
   C  r\  )Nxor)r?  r  r  r#   r$   rF  r   rG  r  r   rl   r^  r   r   r   
atomic_xor  r`  zTritonSemantic.atomic_xorc              
   C  r\  )Nxchg)r?  r  r  r#   r$   rF  r   rG  XCHGr   rl   r^  r   r   r   atomic_xchg  s   

zTritonSemantic.atomic_xchgc                 C  sL   |  | jjjv sJ d| jjj d| | }|dkr d}ttj|S )Nzinput_precision must be one of . Got TF32X3TF32x3)lowerr$   r   allowed_dot_input_precisionsupperr#  r   INPUT_PRECISION)r   input_precisionr   r   r   _str_to_dot_input_precision  s   z*TritonSemantic._str_to_dot_input_precisionaccro  max_num_imprecise_acc	out_dtypec              
   C  s  |j  r
|j  sJ |j r|j rn@|jtjtjtjtjtj	fv s.J d|j |jtjtjtjtjtj	fv sEJ d|j |j|jksWJ d|j d|j |j
 sa|j
 r{d| jjjv rmtd | |tj}| |tj}|d u r| jjj}| |}t|j}t|j}||  krdksn ||  krdksn J d	|j d
|j d|jd j|jd jksJ d|j d|j d|jd j d|jd j d	| jjdd usJ d| jjd |j |j }	|jd j|	d kr|jd j|	d kr|jd j|	d ks)J d|	d  d|	d  d|	d  |j j rF|j jtjks<J d| jd}
tj}n4| rOtd|j j s]|j j rg| jd}
tj	}n|  rr| j!dn| jd}
|}|j jd }|j jd }|j jd }|dkr|j jd nd }t"||r|||gn||g}|d u r| j#|$| j|
}n|j%}|j |ksJ |d u r|j r|j r| jjj&}nd}n|j r|j r||krtd| d| d| '| j(|j%|j%||||S )NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   fp8e4b15zthe use of fp8e4b15 is deprecated on Hopper and later architectures and can cause significant slow down. It will be removed in a future triton releaser      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r   rC  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ())rl   r,  r9   rR   r-   r  uint8rJ   rK   rL   r~  r$   r   !deprecated_fp8_dot_operand_dtypeswarningswarnr   default_dot_input_precisionrp  r-  r  rI   r  r  r|   rS   	get_int32r.   rQ   r+   rO   get_fp32rP   get_fp16r  r.  r   r   max_num_imprecise_acc_defaultr#   
create_dot)r   rv   rx   rq  ro  rr  rs  lhs_rankrhs_rankry  r   ret_scalar_tyMNKBr   
acc_handler   r   r   dot  s   

$



F0, 
" 

"zTritonSemantic.dotfloat_formatc                 C  s.   t tj| d }|d u rtd| d|S )NzInvalid float format: r\   )r#  r   ScaleDotElemTypeTYrm  r+   )r   r  ty_enumr   r   r   _str_to_fp_type	  s   zTritonSemantic._str_to_fp_typec                 C  s   t jt jt jt jd|}|du r-|dksJ d| |jt jks+J d|j |S |j|kr4|S t jt jt jt jd| }|j|ksQJ d| d|j | 	||S )z
        If float_format is subbyte, make sure it's packed as uint8 and return it.
        Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
        )e5m2e4m3bf16fp16Ne2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for rh  )
r-   float8e5
float8e4nvrK   rJ   r  r9   rz  r<  r   )r   r+  r  	triton_tyunsigned_tyr   r   r   _bitcast_to_fp_type  s   
 z"TritonSemantic._bitcast_to_fp_type	lhs_scale
lhs_format	rhs_scale
rhs_formatTensorTy | None	fast_math
lhs_k_pack
rhs_k_packc           !      C  s  |j  r
|j  sJ t|j}t|j}||  kr dks9n ||  kr+dks9n J d|j d|j d|j}|j}| |}| |}h d}||v sXJ d| ||v scJ d| |d u pqt|tjoq|jd u }|d u pt|tjo|jd u }| 	||}| 	||}|	s|d	ksJ d
|
s|d	ksJ d
|j jdd  \}}|j jdd  \}}|d	krdnd}|d	krdnd}|	r|| n|}|
r|| n|}||ksJ d|j d|j d|dkr|j jd nd }|	s|| }|
s|| }t
||r	|||gn||g}| jd}|d u r%| j|| j|}n|j}|j |ks0J |r5d n|j}|r=d n|j} | | j|j| ||j||||	|
|
|S )Nr   ru  rv  rw  r   >   r  r  r  r  r  zNYI: lhs_format zNYI: rhs_format r  zBonly mxfp4 inputs can be packed along a dimension different than Krx  r   zCReduction dimension should pack the same number of elements; (lhs: r   )rl   r,  r-  r  rI   r  ra   r-   rj   r  r  r$   r  r.  r   r   r#   create_dot_scaled)!r   rv   r  r  rx   r  r  rq  r  r  r  rs  r  r  lhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_noner  K_LHSK_RHSr  PACKED_APACKED_BPACKED_A_DIMPACKED_B_DIMr  r   r   r  rhs_scale_handlelhs_scale_handler   r   r   
dot_scaled!  sV   

F

" 

zTritonSemantic.dot_scaled	conditionc                 C  s   |j tjkrtd|j   | |tj}| ||dd\}}|j r6| 	||\}}| 	||\}}n| 	||\}}|j}| 
| j|j|j|j|S )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)r9   r-   rc   r|  r}  r   r   rl   r,  r   r#   r$   create_selectr   )r   r  rm   r   rg  r   r   r   r   rJ  U  s   

zTritonSemantic.wherec                 C  s"   |r	t ||}n|}| ||S r%   )r-   r  r#   )r   rm   rT   rh  res_tyr   r   r   wrap_tensori  s   zTritonSemantic.wrap_tensorinputsSequence[TensorTy]Tuple[TensorTy, ...]c                   s    d u rt fddD d d jjt} |k s'J d| d fddtD tfddD sAJ d	jd
d D  |  sWJ t fddt	tD S )Nc                 3  s&    | ]} j ||jjgd dV  qdS )TrD  N)r3  r&  rI   r7  tr   r   r   rR  s  s   $ z+TritonSemantic.reduction.<locals>.<genexpr>r   z&reduction axis must be < inputs rank (r   c                   s   g | ]
\}}| kr|qS r   r   )r7  r]  r2  )r&   r   r   r8  y  s    z,TritonSemantic.reduction.<locals>.<listcomp>c                 3  s    | ]	}|j j kV  qd S r%   )rl   r  r  r:  r   r   rR  z  s    z-all reduction inputs must have the same shapec                 S     g | ]}|j qS r   r   r  r   r   r   r8  |      c                 3  s,    | ]} | | jjV  qd S r%   r  
get_resultrl   r|   r7  r]  )r  	reduce_oprh  r   r   r   rR    s    
)
tuplerl   r  r-  rZ  allr$   create_reduceverifyr  )r   r  r&   region_builder_fnrankr   )r&   r  r  rh  r   r  r   	reductionq  s   
zTritonSemantic.reductionreversec                   s    d j jt}| |  kr|k s!n J d| d| d|dk r)||7 } D ]}|j jks7J dq+jdd  D |||  sOJ t fdd	tt D S )
Nr   z
scan axis z must be < inputs rank (r   z(all scan inputs must have the same shapec                 S  r  r   r  r  r   r   r   r8    r  z3TritonSemantic.associative_scan.<locals>.<listcomp>c                 3  s,    | ]} | | jjV  qd S r%   r  r  r  scan_opr   r  r   r   rR    s   * z2TritonSemantic.associative_scan.<locals>.<genexpr>)rl   r  r-  r$   create_scanr  r  r  )r   r  r&   r  r  r  r  r   r  r   associative_scan  s   .$zTritonSemantic.associative_scansrcindexc                 C  s   |j  s	J dt|jj}t|jj|ksJ d| |  kr&|k s2n J d| d| d|dk r:||7 }t|D ]}||krEq>|jj| |jj| ksYJ d| dq>| j|j|j|}| 	||jj
|jjS )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r   r   z
index dim z( must match the corresponding source dim)r9   rS   r-  rl   r  r  r$   create_gatherr   r  r|   )r   r  r  r&   r  rQ  gatherr   r   r   r    s   .*zTritonSemantic.gathernum_binsc                 C  s~   t |jdksJ d|j sJ d|d ur,| ||j}|jj s)td|j	}| 
| j|j	||ttj|gS )Nr   z histogram only supports 1D inputz%histogram only supports integer inputr-  )r-  r  r9   rS   r_  rl   r|   r  r+   r   r#   r$   create_histogramr-   r  r.   )r   r   r  r  r   r   r   	histogram  s   zTritonSemantic.histogramr`  c                 C  s@   t dt|jt|krtd|jdt||j  |S )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   r-  r  r+   r   set_attrr   	make_attrget_contextr   rm   r`  r   r   r   multiple_of  s   zTritonSemantic.multiple_ofc                 C  :   t |jt |krtd|jdt||j  |S )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityr-  r  r+   r   r  r   r  r  r  r   r   r   max_contiguous     zTritonSemantic.max_contiguousc                 C  r  )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  r   r   r   max_constancy  r  zTritonSemantic.max_constancyc                 C  s   |  | j tjS r%   )r#   r$   create_barrierr-   r  r  r   r   r   debug_barrier  r+  zTritonSemantic.debug_barrierprefixargsList[TensorTy]hexc                 C  s   | ds|r|d7 }| ds|r|d d d }t|dkr)|ds)d| }dd |D }dd |D }| | j||||tjS )N rX  rC  r   c                 S  r  r   r  r7  argr   r   r   r8    r  z/TritonSemantic.device_print.<locals>.<listcomp>c                 S  s   g | ]}|j  qS r   )r9   r   r  r   r   r   r8    r9  )endswithr-  
startswithr#   r$   create_printr-   r  )r   r  r  r  new_args	is_signedr   r   r   device_print  s   zTritonSemantic.device_printr   r   c                 C  s(   | j jjsd S | | j |j|tjS r%   )r$   r   debugr#   create_assertr   r-   r  )r   r   r   r   r   r   r     s   
zTritonSemantic.device_assertc                 C  s   |  | j|jtjS r%   )r#   r$   create_assumer   r-   r  )r   r   r   r   r   assume  s   zTritonSemantic.assumec                 C  s>  t |tr
t|}t |tjrWt |jtr| j|jS |r;d|j  kr*dk s4n J d|j d| j|jS d|j  krFdk sPn J d|j d| j	|jS t |tj
r|jjdksgJ d	|j spJ d
|jtjkr|r| j|j| j |j S |jtjkr|sJ d|jS J dt| )NrZ   r[   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangerX   rY   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetsFzzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )ra   r'   r-   rj   rI   rC   r$   rb   r  r  r#   r&  r9   rS   re   r   r   get_int64_tyr   r.   rl   )r   r  r  r   r   r   _convert_elem_to_ir_value  s2   



z(TritonSemantic._convert_elem_to_ir_valuec                   s,   t |dr fdd|D S | gS )Nr  c                   s   g | ]} | qS r   )r  r  r  r   r   r   r8    s    z8TritonSemantic._convert_to_ir_values.<locals>.<listcomp>)r  r  )r   	list_liker  r   r  r   r    s   
z$TritonSemantic._convert_to_ir_valuesbasec              	     s:  |  |}|  |}| j |dd}|j r|jj r td|jjtjkr4| |t	tj
|jj}t ds< g dd  D  tdd  D sPJ d	t|dsX|g}d
d |D }t|ttt|ksoJ dt fdd||||fD sJ d| j|j||| |}| |t	t|jj S )NFr  zMExpected `base` to be a pointer type (but not a block pointer type or others)r  c                 S  r  r   r  r  r   r   r   r8    r  z1TritonSemantic.make_block_ptr.<locals>.<listcomp>c                 s  s2    | ]}t |tod |  kodk n  V  qdS )rX   rY   N)ra   r'   r  r   r   r   rR    s   0 z0TritonSemantic.make_block_ptr.<locals>.<genexpr>zGExpected a list of constant integers (`int32_t` range) in `block_shape`c                 S  r  r   r  r  r   r   r   r8  #  r  z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc                 3  s     | ]}t  t |kV  qd S r%   )r-  )r7  r  r  r   r   rR  '  s    zBExpected shape/strides/offsets/block_shape to have the same length)r  rl   rs   r  r,  r+   r-   rc   r   r  r  r  r  r  rT  rU  r  r-  r$   create_make_block_ptrr   r#   r  )r   r  r  stridesr  r  orderr   r   r  r   make_block_ptr
  s,   



  zTritonSemantic.make_block_ptrc                 C  s(   | j |dd}| | j|j||jS r  )r  r#   r$   create_advancer   rl   )r   r  r  r   r   r   advance0  s   zTritonSemantic.advancer  r  List[tl.constexpr]tl.tensor_descriptorc                   s  t |}d|  krdksn td| dt ||kr)td| dt | t ||kr;td| dt | t|jtjsDJ |jjjd	 }t|d
 }|| dk rhtd| d| d||  dt|d
 |d
< |d
 dkrtd|d
   fdd|D } fdd|D }t	|}t|j
tjsJ t|j
j|}|j}	|j
j }
 j|	dd |D dd |D ||
}t||||S )Nr      z Expected 1 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got r  rC  r5  zRDescriptor block shape must have at least 16 bytes in the last dimension, but got z * z = z bytesz-Tensor descriptor last dim must be 1 but got c                      g | ]	}  |tjqS r   )r'  r-   r.   r6  r  r   r   r8  Q      z9TritonSemantic.make_tensor_descriptor.<locals>.<listcomp>c                   r  r   )r'  r-   re   r6  r  r   r   r8  R  r  c                 S  r  r   r  r7  r2  r   r   r   r8  \  r  c                 S  r  r   r  r  r   r   r   r8  ]  r  )r-  r+   ra   r9   r-   r  r  rt  r5  _unwrap_shaperl   r  r   r   r$   create_make_tensor_descriptortensor_descriptor)r   r  r  r  r  r  	elem_sizecontig_dim_sizerl   base_handleis_signed_intr   r   r  r   make_tensor_descriptor7  s8   
z%TritonSemantic.make_tensor_descriptor)r&   r'   r(   r   )r4   r5   r6   r5   r(   r5   )r4   r5   rB   rC   r6   r5   rD   rC   rE   rC   r(   r5   )T)rW   rC   )r   r5   r   r5   rq   rC   r(   rr   )FFTF)rv   rw   rx   rw   r(   ry   )rv   r   rx   r   r   r   )r   rw   r   rw   r   rC   r(   r   )r   rw   r   rw   r(   r   )r   rw   r   rw   r   rC   r(   r   )rm   r   r   r   r   r   )rm   r   r   r   r   r   r   r   )r   r   r   r   r(   ry   )r   r   r   r   r(   r   )r   r   )r   r   r(   r   )r   r   r(   r   )r  r'   r  r'   r   r   r(   r   )r9   r5   r(   r   )r  r(  r9   r5   r(   r   )rI   r   r  r(  r(   r   )r   r   r/  r(  r0  rC   r(   r   )r   r   r&   r'   r(   r   )rv   r   rx   r   r0  rC   r(   r   )rA  r   rB  r   r(   r   )rA  r   r(   ry   )r   r   rN  rO  r(   r   )r   r   r  rO  r(   r   )rv   r   rx   r   r(   r   )rk  rl  )r   r   rs  r5   r(   r   r%   )r   r   rs  r5   r{  rl  r(   r   )r  r   r  r  r   r  r  r   r  rb  r  rb  r  rb  r  rC   r(   r   )r  r  r  rb  r  rb  r(   r   )r  r  rI   r   r(   rr   )r  r  rI   r   r(   r   )r  rb  r  rb  r(   r   )rI   r   r(   r   )r  r   r+  r   r  r  r  rb  r  rb  r(   r   )r  r   r4  r   r+  r   r  rb  r  rb  r(   r   )
r  r   r+  r   r  r   r8  rb  r(   r9  )rm   r   r(   r   )r  r   r+  r   r  r   r  rb  r  rb  r(   r   )rv   r   rx   r   rq  r   ro  rl  rr  r'   rs  r5   r(   r   )r  rb  )r+  r   r  rb  )rv   r   r  r   r  rb  rx   r   r  r  r  rb  rq  r  r  rC   r  rC   r  rC   rs  r5   r(   r   )r  r   rm   r   r   r   r(   r   )r  r  r&   r'   r(   r  )r  r  r&   r'   r  rC   r(   r  )r  r   r  r   r&   r'   r(   r   )r   r   r  r'   r  r  r(   r   )rm   r   r`  r(  r(   r   )r(   r   )r  rb  r  r  r  rC   r(   r   )r   r   r   rb  r(   r   )r  r   r(   r   )
r  r   r  r  r  r  r  r  r(   r   )yr   r   r    r-   r#   __annotations__langr   r0   r3   rA   rV   rk   ru   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r  r   r  r  r!  rg   r'  r*  r)  r3  r=  r@  rI  rM  rW  r_  r   rr  r   r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r&  r)  r,  r0  r3  r7  r?  rE  rW  rY  r[  r_  rb  rd  rg  rp  r  r  r  r  rJ  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r   r   r   r   r"      s   
 	2)%		
	8	o<,&&	N4&r"   )
__future__r   r|  typingr   r   r   r   r   r   r	   rz   triton.runtimer
   _C.libtritonr   r  r   r-   r   r   	Exceptionr   r"   r   r   r   r   <module>   s    $	