o
    ,h}.                     @   s  d dl mZmZ d dlZd dlZd dlZd dlmZmZ d dlm	Z	 ddl
mZ ddlmZ ejjd	ed
ee defddZ			d2dededededef
ddZG dd dZdee defddZdedee defdd Zejjd!ee d"eeeef  dededeee ef f
d#d$Zejjd%eeef d&ee dee fd'd(Zejjd)ee dee d*ee d+ed,eee  d-ee defd.d/ZG d0d1 d1ejZ dS )3    )OptionalUnionN)nnTensorbox_area   )_log_api_usage_once   )	roi_alignlevelsunmerged_resultsreturnc              	   C   s   |d }|j |j}}tj| d|d|d|df||d}tt|D ]4}t| |kd dddd}|	|d|| d|| d|| d}|
d||| }q)|S )Nr   r
   r      dtypedevice)r   r   torchzerossizerangelenwhereviewexpandscatter)r   r   first_resultr   r   reslevelindex r!   R/var/www/html/scripts/venv/lib/python3.10/site-packages/torchvision/ops/poolers.py_onnx_merge_levels   s   &r#         ư>k_mink_maxcanonical_scalecanonical_levelepsc                 C   s   t | ||||S N)LevelMapper)r'   r(   r)   r*   r+   r!   r!   r"   initLevelMapper%   s   r.   c                   @   sL   e Zd ZdZ			ddedededed	ef
d
dZdee defddZ	dS )r-   zDetermine which FPN level each RoI in a set of RoIs should map to based
    on the heuristic in the FPN paper.

    Args:
        k_min (int)
        k_max (int)
        canonical_scale (int)
        canonical_level (int)
        eps (float)
    r$   r%   r&   r'   r(   r)   r*   r+   c                 C   s"   || _ || _|| _|| _|| _d S r,   )r'   r(   s0lvl0r+   )selfr'   r(   r)   r*   r+   r!   r!   r"   __init__;   s
   
zLevelMapper.__init__boxlistsr   c                 C   sv   t t dd |D }t | jt || j  t j| j|j	d }t j
|| j| jd}|t j| j t jS )z<
        Args:
            boxlists (list[BoxList])
        c                 S   s   g | ]}t |qS r!   r   ).0boxlistr!   r!   r"   
<listcomp>O   s    z(LevelMapper.__call__.<locals>.<listcomp>r   )minmax)r   sqrtcatfloorr0   log2r/   tensorr+   r   clampr'   r(   toint64)r1   r3   starget_lvlsr!   r!   r"   __call__I   s   .zLevelMapper.__call__Nr$   r%   r&   )
__name__
__module____qualname____doc__intfloatr2   listr   rD   r!   r!   r!   r"   r-   /   s"    
r-   boxesc                    sT   t j| dd}|j|j t j fddt| D dd}t j||gdd}|S )Nr   )dimc              	      s6   g | ]\}}t j|d d d df |t j dqS )Nr
   )r   layoutr   )r   	full_likestrided)r4   ibr   r   r!   r"   r6   [   s   6 z*_convert_to_roi_format.<locals>.<listcomp>r
   )r   r;   r   r   	enumerate)rM   concat_boxesidsroisr!   rT   r"   _convert_to_roi_formatW   s   rY   featureoriginal_sizec                 C   sb   | j dd  }g }t||D ]\}}t|t| }dtt|   }|| q|d S )Nr   r   )shapeziprK   r   r>   r=   roundappend)rZ   r[   r   possible_scaless1s2approx_scalescaler!   r!   r"   _infer_scaleb   s   rf   featuresimage_shapesc                    s   |st dd}d}|D ]}t|d |}t|d |}q||f  fdd| D }ttj|d tjd  }ttj|d tjd  }	tt|t|	||d}
||
fS )	Nzimages list should not be emptyr   r
   c                    s   g | ]}t | qS r!   )rf   )r4   featoriginal_input_shaper!   r"   r6   z   s    z!_setup_scales.<locals>.<listcomp>r7   r   r)   r*   )	
ValueErrorr9   r   r=   r>   float32itemr.   rJ   )rg   rh   r)   r*   max_xmax_yr]   scaleslvl_minlvl_max
map_levelsr!   rj   r"   _setup_scalesm   s$     rv   xfeatmap_namesc                 C   s,   g }|   D ]\}}||v r|| q|S r,   )itemsr`   )rw   rx   
x_filteredkvr!   r!   r"   _filter_input   s   
r}   rz   output_sizesampling_ratiorr   mapperc                 C   s"  |du s|du rt dt| }t|}|dkr%t| d |||d |dS ||}t|}	| d jd }
| d j| d j}}tj|	|
f| ||d}g }t	t
| |D ]1\}\}}t||kd }|| }t|||||d}t r}||| qT||j||< qTt rt||}|S )a  
    Args:
        x_filtered (List[Tensor]): List of input tensors.
        boxes (List[Tensor[N, 4]]): boxes to be used to perform the pooling operation, in
            (x1, y1, x2, y2) format and in the image reference size, not the feature map
            reference. The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
        output_size (Union[List[Tuple[int, int]], List[int]]): size of the output
        sampling_ratio (int): sampling ratio for ROIAlign
        scales (Optional[List[float]]): If None, scales will be automatically inferred. Default value is None.
        mapper (Optional[LevelMapper]): If none, mapper will be automatically inferred. Default value is None.
    Returns:
        result (Tensor)
    Nz$scales and mapper should not be Noner
   r   )r~   spatial_scaler   r   )rm   r   rY   r   r]   r   r   r   r   rU   r^   r   torchvision_is_tracingr`   r@   r#   )rz   rM   r~   r   rr   r   
num_levelsrX   r   num_roisnum_channelsr   r   resulttracing_resultsr   per_level_featurere   idx_in_levelrois_per_levelresult_idx_in_levelr!   r!   r"   _multiscale_roi_align   sT   
	
r   c                       s   e Zd ZdZeee  ee dZddddee	 de
eee ee f ded	ed
ef
 fddZdee	ef dee deeeef  defddZde	fddZ  ZS )MultiScaleRoIAligna{  
    Multi-scale RoIAlign pooling, which is useful for detection with or without FPN.

    It infers the scale of the pooling via the heuristics specified in eq. 1
    of the `Feature Pyramid Network paper <https://arxiv.org/abs/1612.03144>`_.
    They keyword-only parameters ``canonical_scale`` and ``canonical_level``
    correspond respectively to ``224`` and ``k0=4`` in eq. 1, and
    have the following meaning: ``canonical_level`` is the target level of the pyramid from
    which to pool a region of interest with ``w x h = canonical_scale x canonical_scale``.

    Args:
        featmap_names (List[str]): the names of the feature maps that will be used
            for the pooling.
        output_size (List[Tuple[int, int]] or List[int]): output size for the pooled region
        sampling_ratio (int): sampling ratio for ROIAlign
        canonical_scale (int, optional): canonical_scale for LevelMapper
        canonical_level (int, optional): canonical_level for LevelMapper

    Examples::

        >>> m = torchvision.ops.MultiScaleRoIAlign(['feat1', 'feat3'], 3, 2)
        >>> i = OrderedDict()
        >>> i['feat1'] = torch.rand(1, 5, 64, 64)
        >>> i['feat2'] = torch.rand(1, 5, 32, 32)  # this feature won't be used in the pooling
        >>> i['feat3'] = torch.rand(1, 5, 16, 16)
        >>> # create some random bounding boxes
        >>> boxes = torch.rand(6, 4) * 256; boxes[:, 2:] += boxes[:, :2]
        >>> # original image size, before computing the feature maps
        >>> image_sizes = [(512, 512)]
        >>> output = m(i, [boxes], image_sizes)
        >>> print(output.shape)
        >>> torch.Size([6, 5, 3, 3])

    )rr   ru   r$   r%   rl   rx   r~   r   r)   r*   c                   sV   t    t|  t|tr||f}|| _|| _t|| _d | _	d | _
|| _|| _d S r,   )superr2   r	   
isinstancerJ   rx   r   tupler~   rr   ru   r)   r*   )r1   rx   r~   r   r)   r*   	__class__r!   r"   r2     s   
	


zMultiScaleRoIAlign.__init__rw   rM   rh   r   c                 C   sT   t || j}| jdu s| jdu rt||| j| j\| _| _t||| j| j	| j| jS )a  
        Args:
            x (OrderedDict[Tensor]): feature maps for each level. They are assumed to have
                all the same number of channels, but they can have different sizes.
            boxes (List[Tensor[N, 4]]): boxes to be used to perform the pooling operation, in
                (x1, y1, x2, y2) format and in the image reference size, not the feature map
                reference. The coordinate must satisfy ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
            image_shapes (List[Tuple[height, width]]): the sizes of each image before they
                have been fed to a CNN to obtain feature maps. This allows us to infer the
                scale factor for each one of the levels to be pooled.
        Returns:
            result (Tensor)
        N)
r}   rx   rr   ru   rv   r)   r*   r   r~   r   )r1   rw   rM   rh   rz   r!   r!   r"   forward!  s   zMultiScaleRoIAlign.forwardc                 C   s&   | j j d| j d| j d| j dS )Nz(featmap_names=z, output_size=z, sampling_ratio=))r   rF   rx   r~   r   )r1   r!   r!   r"   __repr__C  s   zMultiScaleRoIAlign.__repr__)rF   rG   rH   rI   r   rL   rK   r-   __annotations__strr   rJ   r   r2   dictr   r   r   __classcell__r!   r!   r   r"   r      s4    #

"r   rE   )!typingr   r   r   torch.fxr   r   r   torchvision.ops.boxesr   utilsr	   r   jitunusedrL   r#   rJ   rK   r.   r-   rY   rf   fxwrapr   rv   r   r   r}   r   Moduler   r!   r!   r!   r"   <module>   st    

((
S