o
    hJ6                     @   s   d dl mZmZmZmZmZ d dlmZ ddlm	Z
 ddlmZ d dlmZ d dlmZmZ edZefd	ed
eg ef fddZG dd dee ZdS )    )SequenceListTypeVarTupleCallable)TritonSemantic   )_core)SliceLayout)GluonOpBuilder)flatten_values_to_irunflatten_ir_valuesTensorTycondmsg_fnc                 C   s   | s|| d S N )r   r   categoryr   r   g/var/www/html/scripts/venv/lib/python3.10/site-packages/triton/experimental/gluon/language/_semantic.py_check   s   
r   c                       s  e Zd ZU ejZeZeed< defddZdd Z	de
e de
e fdd	Zd
ededefddZdededef fddZdedeeef f fddZd
edee def fddZd
edee defddZdededef fddZ fdd Zd
ed!e
e d"ef fd#d$Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Z d7d8 Z!d9d: Z"d;d< Z#d=d> Z$d?d@ Z%e&dAdB Z'dCe(e dedeedDf fdEdFZ)dGe(e dHe(e fdIdJZ*  Z+S )KGluonSemanticbuilderc                 C   s
   || _ d S r   )r   )selfr   r   r   r   __init__   s   
zGluonSemantic.__init__c                 C   s,   t |jj|j| j|j}| |j|S r   )	ttgldistributed_typetypescalarshaper   get_gluon_layout_from_tensorhandletensor)r   r!   tyr   r   r   _wrap_tensor_infer_layout   s   z'GluonSemantic._wrap_tensor_infer_layout	lhs_shape	rhs_shapec                 C   s   t |t |krtd| d| g }t|D ]3\}}|| }|dkr*|| q|dks2||kr8|| qtdt| d t| d t| |S )N!Cannot broadcast, rank mismatch: , r   z?Cannot make_shape_compatible: incompatible dimensions at index : z and )len
ValueError	enumerateappendstr)r   r$   r%   	ret_shapeileftrightr   r   r   _broadcast_shapes   s*   zGluonSemantic._broadcast_shapesinputaxisreturnc                    s   dd j D }| d  dk r tj 7  ttjtjfdd jjttt	fdd tj
 k fdd tjj|j}| jj || j}| ||S )	Nc                 S   s   g | ]}t |qS r   )r   _unwrap_if_constexpr.0xr   r   r   
<listcomp>/       z-GluonSemantic.expand_dims.<locals>.<listcomp>r   r   c                         d j S Nz=expected expand_dims input to be a distributed_type but got: r   r   r3   r   r   <lambda>6       z+GluonSemantic.expand_dims.<locals>.<lambda>c                      
   d  S )Nz;expected expand_dims input to have a SliceLayout, but got: r   r   )layoutr   r   r@   9      
 c                      s   d  dj  S )Nz7expected expand_dims input layout to be sliced in axis z	 but got )dimr   )r4   rC   r   r   r@   ;       )r   insertr)   r   
isinstancer   r   r   rC   r
   rE   r   parentr   create_expand_dimsr    to_irr!   )r   r3   r4   	dst_shaperet_tyr    r   )r4   r3   rC   r   expand_dims.   s"   



zGluonSemantic.expand_dimsabc                    s8   |  ||\}}t|jg kd t ||}| |S )NzCannot join scalars in gluon)broadcast_impl_valuer   r   superjoinr#   )r   rO   rP   value	__class__r   r   rS   A   s   
zGluonSemantic.joinc                    s$   t  |\}}| || |fS r   )rR   splitr#   )r   rO   lhsrhsrU   r   r   rW   G   s   zGluonSemantic.splitdimsc                    s   t  ||}| |S r   )rR   permuter#   )r   r3   rZ   rT   rU   r   r   r[   K   s   
zGluonSemantic.permuter   c                    s   t t jtj fdd  j t ttkfdd kr) S tD ]#\}}| |krP|dkrPtd|  d| d| d d	 
q-t jj	 jj
}| j j|| j}| ||S )
Nc                      r<   r=   r>   r   r?   r   r   r@   Q   rA   z4GluonSemantic.broadcast_impl_shape.<locals>.<lambda>c                         d d  S )Nr&   r'   r   r   )r   	src_shaper   r   r@   S       r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension r(   r'   )r   rH   r   r   r   get_block_shapesr)   r+   r*   r   rC   r   create_broadcastr    rK   r!   )r   r3   r   r/   itemrM   r    r   )r3   r   r]   r   broadcast_impl_shapeO   s,   

 z"GluonSemantic.broadcast_impl_shaperX   rY   c                    s   |j  |j   r st ||S tt tj fdd tttjfdd   } }| 	||} j
j
krOtd j
 dj
 | ||}| ||}||fS )Nc                      
   d S )Nz@expected broadcast left input to be a distributed_type but got: r   r   )lhs_tyr   r   r@   g   rD   z4GluonSemantic.broadcast_impl_value.<locals>.<lambda>c                      rc   )NzAexpected broadcast right input to be a distributed_type but got: r   r   )rhs_tyr   r   r@   i   rD   zLayout mismatch in broadcast: z vs )r   is_blockrR   rQ   r   rH   r   r   r_   r2   rC   r*   rb   )r   rX   rY   r$   r%   r.   rU   )rd   re   r   rQ   _   s$   

z"GluonSemantic.broadcast_impl_valuec                    s,   || g}t t j||}t j|||dS )N)rM   )r   r   int32rR   arange)r   startendrC   r   rM   rU   r   r   rh   u   s   
zGluonSemantic.arangerL   can_reorderc                    s&   t | d t |||}| |S )Nz%can_reorder is not supported in gluon)r   rR   reshaper#   )r   r3   rL   rk   rT   rU   r   r   rl   z   s   
zGluonSemantic.reshapec                 C   s4   t |j||}| j|| j|j}t ||S r   )r   r   dtyper   create_splatrK   r    r!   )r   rT   r   rC   rM   r    r   r   r   splat   s   zGluonSemantic.splatc                 C   s   |  ||}| |||S r   )make_scalarro   )r   r   rT   rm   rC   r   r   r   r   full   s   zGluonSemantic.fullc                    sV   |j  tt tj fdd t j j|}| j|	| j|j
}t||S )Nc                      rc   )Nz@expected convert_layout input to be a distributed_type but got: r   r   r"   r   r   r@      rD   z.GluonSemantic.convert_layout.<locals>.<lambda>)r   r   rH   r   r   
element_tyr   r   create_convert_layoutrK   r    r!   )r   rT   rC   rM   r    r   rr   r   convert_layout   s   
zGluonSemantic.convert_layoutc                 C   sX   t ||||}|d ur| j|| j|j}n
| j|| j}t |||||S r   )r   shared_memory_descriptor_typer   create_local_allocrK   r    shared_memory_descriptor)r   rs   r   rC   rT   r"   r    r   r   r   allocate_shared   s
   zGluonSemantic.allocate_sharedc                 C   s6   t |j|j|}| j|| j|j}t ||S r   )	r   r   rm   r   r   create_local_loadrK   r    r!   )r   mem_descrC   rM   r    r   r   r   shared_load   s   zGluonSemantic.shared_loadc                 C   s   | j |j|j d S r   )r   create_local_storer    )r   r{   rT   r   r   r   shared_store   s   zGluonSemantic.shared_storec                 C   s   | j |j d S r   )r   create_local_deallocr    )r   r{   r   r   r   shared_dealloc   s   zGluonSemantic.shared_deallocc                 C   sL   |j }t|j|||jj}| j}||||j	|}tj
|fi |jS r   )rC   r   rv   rm   r   alloc_shaper   create_memdesc_subviewrK   r    rx   __dict__)r   r{   offsetsr   rC   r"   r   r    r   r   r   _memdesc_subview   s
   zGluonSemantic._memdesc_subviewc                 C   sD   | j dg|j }| |j||< t|j}|||< | |||S )Nr   )r   	get_int32rank	to_tensorr    listr   r   )r   r{   ri   lengthrE   r   r   r   r   r   memdesc_slice   s
   
zGluonSemantic.memdesc_slicec                 C   s@   |j dd  }| jdg|j }| |j|d< | |||S )Nr   r   )r   r   r   r   r   r    r   )r   r{   indexr   r   r   r   r   memdesc_index   s   zGluonSemantic.memdesc_indexc                    s   t |t jksJ dj dt | dfdd|D }jj  d t  j  }| fdd|D 7 }| jj|}| j|}t	j
|j|||dS )Nzsource rank (z) and order length (z) must matchc                    s   g | ]} j | qS r   r   r8   r/   )r{   r   r   r:      r;   z/GluonSemantic.memdesc_trans.<locals>.<listcomp>c                    s&   g | ]} t  j d  | qS r   )r)   r   r   r   r{   r   r   r:      s   & )rs   r   r   rC   )r)   r   r   r   r   r   create_memdesc_transr    get_gluon_layout_from_memdescr   rx   rm   )r   r{   orderr   new_alloc_shaper    rC   r   r   r   memdesc_trans   s   zGluonSemantic.memdesc_transc                 C   sB   t |j|||jj}| j|| j|j}t j	|fi |j
S r   )r   rv   rm   r   r   r   create_memdesc_reshaperK   r    rx   r   )r   r{   r   rC   r"   r    r   r   r   memdesc_reshape   s   zGluonSemantic.memdesc_reshapec                 C   s<   t ||||}| j|| j|j}t j|fi |jS r   )r   rv   r   create_memdesc_reinterpretrK   r    rx   r   )r   r{   rm   r   rC   r"   r    r   r   r   memdesc_reinterpret   s   z!GluonSemantic.memdesc_reinterpretc                 C   s$   |r
t |||}n|}| ||S r   )r   r   r!   )r   r9   	scalar_tyr.   rC   res_tyr   r   r   wrap_tensor   s   zGluonSemantic.wrap_tensorc                    sl   | D ]t tjtjfdd qdd | D d  t t fdddd  D fd	d d S )
Nc                      r<   )Nz#expected distributed_type but got: r>   r   )r9   r   r   r@      rA   z2GluonSemantic._check_same_layout.<locals>.<lambda>c                 S   s   g | ]}|j jqS r   )r   rC   r7   r   r   r   r:          z4GluonSemantic._check_same_layout.<locals>.<listcomp>r   c                 3   s    | ]}| kV  qd S r   r   )r8   l)l0r   r   	<genexpr>   s    z3GluonSemantic._check_same_layout.<locals>.<genexpr>r   c                      rB   )Nz3Expected inputs to have matching layouts, but got: r   r   )layoutsr   r   r@      rD   )r   rH   r   r   r   all)xsr   )r   r   r9   r   _check_same_layout   s   
z GluonSemantic._check_same_layoutinputs.c                    s   t  d udd  d jjtt d   kok n   fdd   fddtD t d jjtfddD sOJ d	j	
d
d D  |  seJ tfddttD S )Nc                   S   s   dS )Nz*All-reduce is not yet implemented in gluonr   r   r   r   r   r@      s    z)GluonSemantic.reduction.<locals>.<lambda>r   c                      r\   )Nz/expected reduction axis to be in the range [0, z
) but got r   r   )r4   r   r   r   r@      r^   c                    s   g | ]
\}}| kr|qS r   r   )r8   r/   s)r4   r   r   r:      s    z+GluonSemantic.reduction.<locals>.<listcomp>c                 3   s    | ]	}|j j kV  qd S r   )r   r   r8   tr   r   r   r      s    z*GluonSemantic.reduction.<locals>.<genexpr>z-all reduction inputs must have the same shapec                 S      g | ]}|j qS r   )r    r   r   r   r   r:      rF   c                 3   s.    | ]} | | jjV  qd S r   )r   
get_resultr   r   r   )r   	reduce_op
ret_layoutr.   r   r   r   r      s
    
)r   r   r   r)   r   r+   r
   rC   r   r   create_reduceverifytuplerange)r   r   r4   region_builder_fnr   )r4   r   r   r   r   r.   r   r   r   	reduction   s   (

zGluonSemantic.reductionworker_num_warpsworker_num_regsc                    s  t |}|t |ksJ d| dt | d|t |ks*J d| dt | d| j}| }	| }
||
 |j||i d}g }|d urLt|}|| dd |D }||	 t|}|	|||
 |
 | | g  ||}dd |D }t|D ]0}||||  fd	dtt |D }t|d
d |D }|j|| |i d |  q|  fddtt |D }|d u rd S tt|dd |D S )Nzwarp specialize got z partitions but z warp countsz register counts)kwargsc                 S      g | ]}|  qS r   get_typer8   rr   r   r   r:     r   z1GluonSemantic.warp_specialize.<locals>.<listcomp>c                 S   r   r   r   r8   argr   r   r   r:     r   c                       g | ]}  |qS r   )get_argument)r8   j)blockr   r   r:     r;   c                 S   r   r   r>   r   r   r   r   r:     rF   c                    r   r   )r   r   )ws_opr   r   r:     r;   c                 S   r   r   r>   r   r   r   r   r:     rF   )r)   r   get_insertion_point	new_blockset_insertion_point_to_startcall_JitFunctionr   create_warp_yieldrestore_insertion_pointcreate_warp_specializeget_default_region	push_backset_requested_registerscreate_block_with_parentget_partition_op_holder!create_warp_specialize_partitionsr   
get_regionr   create_warp_returnset_insertion_point_afterget_operationr   )r   argsdefault_partitionworker_partitionsr   r   	generatornum_partitionsr   	insert_ptdefault_blockdefault_resultsmlir_resultsresult_types	mlir_argspartitions_op	arg_typesr/   
block_argsr   )r   r   r   warp_specialize   sP   





zGluonSemantic.warp_specialize),__name__
__module____qualname__r   r!   langr   __annotations__r   r#   r   intr2   r   rN   rS   r   rW   r[   rb   rQ   rh   boolrl   ro   rq   ru   ry   r|   r~   r   r   r   r   r   r   r   r   staticmethodr   r   r   r   __classcell__r   r   rU   r   r      sF   
 
"
r   N)typingr   r   r   r   r   triton.language.semanticr    r	   r   _layoutsr
   triton._C.libtriton.gluon_irr   triton.compiler.code_generatorr   r   r   r*   r   r-   r   r   r   r   r   r   <module>   s    