o
    ,hB6                  #   @   s   U d dl Z d dlZd dlmZmZ d dlZd dlmZ ejdedZi Z	e
ejjef ed< eh dZded	ed
eegef fddZedd	d;dd d ddejdejdejdeej dededed
ejfddZdee ded
efddZdejdeded
ejfd d!Zd"ejd#ejd$ed%edee d&ed
ejfd'd(Zd$ed%ed
dfd)d*Zd"ejd#ejd$ed%edee d
ejfd+d,Zed-d			d<dd d d dd.dd/d"ejd#ejd0ejd1eej d2eej d3eej d4ed5ed6ed&edee d7ed8ee d
eejejejejf fd9d:ZdS )=    N)CallableOptional)_dtype_mappings_T)boundONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_versionreturnc                    s   dt dt f fdd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    sP   d }t jjd  d| dd| }| tttt jj |< ||  |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   O/var/www/html/scripts/venv/lib/python3.10/site-packages/torch/onnx/ops/_impl.py	decorator   s   

z_onnx_op.<locals>.decorator)r   )r   r   r   r   r   r   _onnx_op   s   r    RotaryEmbedding   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr#   r$   r%   c                   sN   j d } j d }t j dkr2 j d }	t|dk fdd |	| }
||||
g}t | tt j dkdd   j d }
|dkrJ|
} d	d	d	d	d	d	d	|f } d	d	d	d	d	d	|d	f }|d }|d	ur{|| }|| }n|}|}|d	d	d	d	d	|f }|d	d	d	d	d	|f }t|d}t|d}|r|d	d	d	d	d	d	dd	df }|d	d	d	d	d	d	dd	df }n
tj|dd
d\}}|| ||  }|| ||  }|rt|d
}t|d
}tj||fd
d}t||j }n	tj||fd
d}tj||fd
d}t j dkr%t| j }|S )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   r         c                      s   d j  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape )shaper   r&   r   r   <lambda>>   s    z%rotary_embedding_23.<locals>.<lambda>   c                   S      dS )Nzx should be a 4D tensor by nowr   r   r   r   r   r.   C       Ndim)r,   lenr   _checkreshape	unsqueezechunkcat)r&   r'   r(   r)   r#   r$   r%   
batch_sizesequence_lengthhidden_size	head_size	new_shapex_rotatex_not_rotaterotary_embedding_dim_halfcossinx1x2realimagx_rotate_concatoutputr   r-   r   rotary_embedding_23+   sn   




  "$rK   scaler>   c                 C   s   | dur| S dt | S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)rL   r>   r   r   r   _get_scale_factor   s   rO   tensorr;   c                 C   s:   | j d | j d }}|| }| ||||dd S )z1Reshape 3D tensor to 4D for multi-head attention.r   r+   )r,   view	transpose
contiguous)rP   r;   r$   r<   r=   r>   r   r   r   _reshape_3d_to_4d   s   rT   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec              	   C   s2   |dkrt | ||||S tt| |ddS )z1Get QK output tensor based on the specified mode.r   r2   )_compute_qk_output_for_mode_0r   
zeros_likematmulrR   )rU   rV   rW   rX   rL   rY   r   r   r   _get_qk_output_for_aten_spda   s
   	
r^   c                    s"   t   dk fdd dS )z-Validate Group Query Attention configuration.r   c                      s   d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   r   rX   rW   r   r   r.      s    z-_validate_gqa_configuration.<locals>.<lambda>N)r   r6   )rW   rX   r   r_   r   _validate_gqa_configuration   s   
r`   c                 C   s`   |}||kr|| }|j |dd}t|| jd }t|}| | }	|| }
t|	|
ddS )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r   r3   r*   rZ   r2   )repeat_interleaverO   r,   rM   rN   r   r]   rR   )rU   rV   rW   rX   rL   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaledr   r   r   r[      s   	
r[   	Attention        )	is_causalkv_num_headsq_num_headsrY   rL   softcapsoftmax_precisionV	attn_maskpast_key
past_valuerj   rk   rl   rm   rn   c          (      C   s  d\}}}t | j}| jd }t | jdkr;t|dko|dkdd  | jd }t| ||} t|||}t|||}tt | jdkoQt |jdkoQt |jdkdd  | j| }t|
|}
|d	urmtj||g|d
n| }|d	ur~tj||g|d
n| }||}}| j| }|j| }| j| }|j| }|dko|	dko|d	u o|d	u p|jtj	k}t
|| |rd	}|d	ur|jtj	kr| n|}tjjj| |||d||
t	||kd}t| ||||
|	}n||kr|| }|j||d
}|j||d
}tj||| j| jd}|r+t|d	u dd  ttj||tj	| jd}|| td}|d	urE|jtj	krA|| td}n|| }t|
| jd } t| }!| |! }"||! }#t|"|#dd}$|$}|$| }%|	dkrq|%}|dkr|t|%|  }%|	dkr|%}|d	ur|tv r|%j}&|%tj| }%tj|%dd
}'|'|&}'ntj|%dd
}'ntj|%dd
}'|	dkr|'}t|'|}|dkr|dd  !||d}||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r   r+   r*   r   r*   c                   S   r0   )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r   r   r   r.      r1   zattention_23.<locals>.<lambda>r   r/   c                   S   r0   )Nz'Q, K, and V should be 4D tensors by nowr   r   r   r   r   r.      r1   Nr3   ri   )rp   	dropout_prj   rL   
enable_gqa)dtypedevicec                   S   r0   )Nz'Cannot use both is_causal and attn_maskr   r   r   r   r   r.   @  r1   z-infrZ   r2   r+   )"r5   r,   r   r6   rT   rO   r:   cloneru   boolr`   nn
functionalscaled_dot_product_attentionr^   ra   zerosrv   trilonesmasked_fillfloatrM   rN   r]   rR   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrS   rQ   )(rU   rV   ro   rp   rq   rr   rj   rk   rl   rY   rL   rm   rn   num_head_dimsequence_dimhead_diminput_shape_lenr;   q_sequence_lengthq_head_sizepresent_keypresent_valuerW   rX   kv_sequence_lengthcan_use_sdpasdpa_attn_maskrJ   	qk_outputrc   	attn_biascausal_maskrd   re   rf   rg   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxr   r   r   attention_23   s   



(
















r   )N)NNN)rM   typingr   r   r   torch.onnx.opsr   TypeVarr   r   dict_ops
OpOverload__annotations__	frozensetr   strintr    Tensorrx   rK   r   rO   rT   r^   r`   r[   tupler   r   r   r   r   <module>   s   
 
	U




	
