o
    h                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
Z
d dlmZ d dlmZmZ d dlmZmZmZ g dZg dZee Zg dZed	g Zee Zed	g Zd
dgZdge dg e d	g Zeeeh d Zdd Z dd Z!dd Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd  Z(d!d" Z)d#d$ Z*d%d& Z+dAd'ee fd(d)Z,dBd*ej-d+eeej.f fd,d-Z/d*e0d+ej1fd.d/Z2d+e0fd0d1Z3d2d3 Z4dCd5d6Z5dCd7d8Z6e
j7j8e5  e6 d9Z9d:e:d;e:fd<d=Z;d>eej.ej<j=jf d+ej.fd?d@Z>dS )D    N)knobs)RandomState)OptionalUnion)TensorWrapperreinterprettype_canonicalisation_dict)int8int16int32int64)uint8uint16uint32uint64)float16float32float64bfloat16float8_e4m3fnfloat8_e5m2boolr   >   r   r   r   c                   C   s   t jdddkS )NTRITON_INTERPRET01)osenvironget r   r   S/var/www/html/scripts/venv/lib/python3.10/site-packages/triton/_internal_testing.pyis_interpreter      r    c                   C   s   t  rd S tjjj S N)r    tritonruntimedriveractiveget_current_targetr   r   r   r   r'      s   r'   c                  C      t  } | d u r	dS | jdkS )NFcudar'   backendtargetr   r   r   is_cuda$      r.   c                   C   s   t  otj d dkS )Nr   	   )r.   torchr)   get_device_capabilityr   r   r   r   	is_hopper)   s   r3   c                  C   r(   )NFhipr*   r,   r   r   r   is_hip-   r/   r5   c                  C   "   t  } | d uo| jdko| jdkS )Nr4   gfx90ar'   r+   archr,   r   r   r   is_hip_cdna22      r:   c                  C   r6   )Nr4   gfx942r8   r,   r   r   r   is_hip_cdna37   r;   r=   c                  C   r6   )Nr4   gfx950r8   r,   r   r   r   is_hip_cdna4<   r;   r?   c                  C   s,   t  } t| j | d uo| jdkod| jv S )Nr4   gfx12)r'   printr9   r+   r,   r   r   r   is_hip_gfx12A   s   
rB   c                   C   s   t  pt pt S r"   )r:   r=   r?   r   r   r   r   is_hip_cdnaG   r!   rC   c                  C   r(   )NFxpur*   r,   r   r   r   is_xpuK   r/   rE   c                  C   s   t  } | d u r	dS t| jS )N )r'   strr9   r,   r   r   r   get_archP   r/   rH   rsc                 C   s8  t | tr| f} |du rtdd}|tt v rOttt|}|du r&|jnt	||j}|du r3|j	nt||j	}tt|}|j
||| |d}d||dk< |S |rad|v ra|j
dd	| tjd}|S |tv ro|dd| |S |d
kr|dd| ddtd@ dS |dv r|dd| dkS td| )zp
    Override `rs` if you're calling this function twice and don't want the same
    result for both calls.
    N   )seed)dtype   r   float8   (   r   r   r   l      )r   int1bool_g        zUnknown dtype )
isinstanceintr   
int_dtypesuint_dtypesnpiinfogetattrminmaxrandintr	   float_dtypesnormalastypeviewr   RuntimeError)shape	dtype_strrI   lowhighrX   rL   xr   r   r   numpy_randomU   s,   


*rg   rf   returnc                 C   s   | j j}|tv r"|d}| tt|}ttj	||dtt
|S |r5d|v r5ttj	| |dtt
|S |dkrF|dkrFtj	| |d S tj	| |dS )z
    Note: We need dst_type because the type of x can be different from dst_type.
          For example: x is of type `float32`, dst_type is `bfloat16`.
          If dst_type is None, we infer dst_type from x.
    u)devicerN   r   r   )rL   namerV   lstripr_   rY   rW   r   r1   tensortlr   )rf   rj   dst_typetsigned_type_namex_signedr   r   r   	to_tritons   s   
rs   c                 C   s   t t|  S r"   )rn   	str_to_tyr   rf   r   r   r   str_to_triton_dtype   s   rv   c                 C   sL   t | tjjr
| jS t | tjrtdt| }|	dS t
dt|  )Nz^torch\.(\w+)$rM   znot a triton or torch dtype: )rS   r#   languagerL   rk   r1   rematchrG   group	TypeErrortype)rL   mr   r   r   torch_dtype_name   s   
r~   c                 C   sl   t | tr| j  ttt| j	S t | t
jr/| j	t
ju r)|    S |   S td|  )Nz Not a triton-compatible tensor: )rS   r   basecpunumpyr_   rY   rW   r~   rL   r1   Tensorr   float
ValueErrorru   r   r   r   to_numpy   s   
 r   Fc                 C   sl   t  rdS t s
dS tjjj}| rdnd}ttt|	d}t
|dks)J |tj d dko5||kS )	NTF)   r   )r      .   r   r0   )r    r.   r   nvidiaptxasversiontuplemaprT   splitlenr1   r)   r2   )
byval_onlycuda_versionmin_cuda_versioncuda_version_tupler   r   r   supports_tma   s   
r   c                 C   s   | rdS dS )NzURequires __grid_constant__ TMA support (NVIDIA Hopper or higher, CUDA 12.0 or higher)zLRequires advanced TMA support (NVIDIA Hopper or higher, CUDA 12.3 or higher)r   )r   r   r   r   tma_skip_msg   s   r   )reasonsizealignc                 C   s   t j| t jddS )Nr)   )rL   rj   )r1   emptyr	   )r   r   _r   r   r   default_alloc_fn   r!   r   rp   c                 C   s   t | tjjjr| jS | S r"   )rS   r#   r$   jitr   r   )rp   r   r   r   unwrap_tensor   s   r   )NNNr"   )F)?r   rx   r   rW   r1   r#   triton.languagerw   rn   r   pytestnumpy.randomr   typingr   r   triton.runtime.jitr   r   r   rU   rV   integral_dtypesr]   float_dtypes_with_bfloat16dtypesdtypes_with_bfloat16torch_float8_dtypestorch_dtypessortedset
tma_dtypesr    r'   r.   r3   r5   r:   r=   r?   rB   rC   rE   rH   rg   ndarrayr   rs   rG   rL   rv   r~   r   r   r   markskipifrequires_tmarT   r   r$   r   r   r   r   r   r   <module>   sV    

 

(