o
    ,hA                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ ddlmZ ddlmZ d	d
lmZmZmZ d	dlmZ d	dlmZmZ g dZG dd de
jZG dd de
jZG dd de
jZG dd de
j Z!G dd de
j Z"G dd de
jZ#G dd de
jZ$G dd de
j Z%de&ee!e"f  dee&eeeef   d e'e( d!ed"e
j f d#ee d$e)d%ed&e%fd'd(Z*d)ed*d+d,Z+G d-d. d.eZ,G d/d0 d0eZ-G d1d2 d2eZ.e ed3e,j/fd4dd5d6d#ee, d$e)d%ed&e%fd7d8Z0e ed3e-j/fd4dd5d6d#ee- d$e)d%ed&e%fd9d:Z1e ed3e.j/fd4dd5d6d#ee. d$e)d%ed&e%fd;d<Z2d	d=lm3Z3 e3e,j/j4e-j/j4e.j/j4d>Z5dS )?    )Sequence)partial)AnyCallableOptionalUnionN)Tensor   )VideoClassification)_log_api_usage_once   )register_modelWeightsWeightsEnum)_KINETICS400_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)VideoResNetR3D_18_WeightsMC3_18_WeightsR2Plus1D_18_Weightsr3d_18mc3_18r2plus1d_18c                       ^   e Zd Z	ddededee dededdf fd	d
Zededeeeef fddZ  Z	S )Conv3DSimpleN   	in_planes
out_planes	midplanesstridepaddingreturnc                    s   t  j||d||dd d S )N)r	   r	   r	   Fin_channelsout_channelskernel_sizer    r!   biassuper__init__selfr   r   r   r    r!   	__class__ Z/var/www/html/scripts/venv/lib/python3.10/site-packages/torchvision/models/video/resnet.pyr*      s   
zConv3DSimple.__init__c                 C   
   | | | fS Nr/   r    r/   r/   r0   get_downsample_stride(      
z"Conv3DSimple.get_downsample_strideNr   r   
__name__
__module____qualname__intr   r*   staticmethodtupler4   __classcell__r/   r/   r-   r0   r      "    &r   c                       sX   e Zd Zddedededededdf fd	d
Zededeeeef fddZ  ZS )Conv2Plus1Dr   r   r   r   r    r!   r"   Nc                    s`   t  tj||dd||fd||fddt|tjddtj||d|ddf|ddfdd d S )	Nr   r	   r	   r   r   Fr&   r    r!   r'   Tinplacer	   r   r   r)   r*   nnConv3dBatchNorm3dReLUr+   r-   r/   r0   r*   .   s   
zConv2Plus1D.__init__c                 C   r1   r2   r/   r3   r/   r/   r0   r4   ?   r5   z!Conv2Plus1D.get_downsample_strider   r   )	r8   r9   r:   r;   r*   r<   r=   r4   r>   r/   r/   r-   r0   r@   -   s    (&r@   c                       r   )Conv3DNoTemporalNr   r   r   r   r    r!   r"   c                    s(   t  j||dd||fd||fdd d S )NrA   r   r   Fr#   r(   r+   r-   r/   r0   r*   E   s   
zConv3DNoTemporal.__init__c                 C   s
   d| | fS Nr   r/   r3   r/   r/   r0   r4   R   r5   z&Conv3DNoTemporal.get_downsample_strider6   r7   r/   r/   r-   r0   rL   D   r?   rL   c                       sb   e Zd ZdZ		ddedededejf dedeej d	df fd
dZ	de
d	e
fddZ  ZS )
BasicBlockr   Ninplanesplanesconv_builder.r    
downsampler"   c                    s   || d d d |d d d|   }t    t|||||t|tjdd| _t||||t|| _tjdd| _|| _	|| _
d S )Nr	   TrC   )r)   r*   rG   
SequentialrI   rJ   conv1conv2relurR   r    r,   rO   rP   rQ   r    rR   r   r-   r/   r0   r*   [   s   (

zBasicBlock.__init__xc                 C   sB   |}|  |}| |}| jd ur| |}||7 }| |}|S r2   )rT   rU   rR   rV   r,   rX   residualoutr/   r/   r0   forwardn   s   




zBasicBlock.forwardr   Nr8   r9   r:   	expansionr;   r   rG   Moduler   r*   r   r\   r>   r/   r/   r-   r0   rN   W   s$    rN   c                       sb   e Zd ZdZ		ddedededejf ded	eej d
df fddZ	de
d
e
fddZ  ZS )
Bottleneck   r   NrO   rP   rQ   .r    rR   r"   c                    s   t    || d d d |d d d|   }ttj||dddt|tjdd| _t|||||t|tjdd| _ttj||| j	 dddt|| j	 | _
tjdd| _|| _|| _d S )Nr	   r   F)r&   r'   TrC   )r)   r*   rG   rS   rH   rI   rJ   rT   rU   r_   conv3rV   rR   r    rW   r-   r/   r0   r*      s   
	("
zBottleneck.__init__rX   c                 C   sL   |}|  |}| |}| |}| jd ur| |}||7 }| |}|S r2   )rT   rU   rc   rR   rV   rY   r/   r/   r0   r\      s   





zBottleneck.forwardr]   r^   r/   r/   r-   r0   ra   |   s$    ra   c                       "   e Zd ZdZd fddZ  ZS )	BasicStemz$The default conv-batchnorm-relu stemr"   Nc              
      s4   t  tjdddddddtdtjdd	 d S )
Nr	   @   )r	      rg   r   r   r   rA   FrB   TrC   rF   r,   r-   r/   r0   r*      s
   
zBasicStem.__init__r"   Nr8   r9   r:   __doc__r*   r>   r/   r/   r-   r0   re          re   c                       rd   )R2Plus1dStemzRR(2+1)D stem is different than the default one as it uses separated 3D convolutionr"   Nc                    sZ   t  tjdddddddtdtjdd	tjdd
dddddtd
tjdd	 d S )Nr	   -   )r   rg   rg   rh   )r   r	   r	   FrB   TrC   rf   rE   r   r   r   )r   r   r   rF   ri   r-   r/   r0   r*      s   

zR2Plus1dStem.__init__rj   rk   r/   r/   r-   r0   rn      rm   rn   c                       s   e Zd Z		ddeeeef  deeeee	e
f   dee dedejf ded	ed
df fddZded
efddZ	ddeeeef  deeee	e
f  dededed
ejfddZ  ZS )r     Fblockconv_makerslayersstem.num_classeszero_init_residualr"   Nc                    s  t    t|  d| _| | _| j||d d|d dd| _| j||d d|d dd| _| j||d d|d dd| _| j||d d	|d dd| _	t
d
| _t
d	|j || _|  D ]N}t|t
jrt
jj|jddd |jdurt
j|jd q`t|t
jrt
j|jd t
j|jd q`t|t
jrt
j|jdd t
j|jd q`|r|  D ]}t|trt
j|jjd qdS dS )a^  Generic resnet video generator.

        Args:
            block (Type[Union[BasicBlock, Bottleneck]]): resnet building block
            conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator
                function for each layer
            layers (List[int]): number of blocks per layer
            stem (Callable[..., nn.Module]): module specifying the ResNet stem.
            num_classes (int, optional): Dimension of the final FC layer. Defaults to 400.
            zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False.
        rf   r   r   r3      r      r	   i   rp   fan_outrV   )modenonlinearityNg{Gz?)r)   r*   r   rO   ru   _make_layerlayer1layer2layer3layer4rG   AdaptiveAvgPool3davgpoolLinearr_   fcmodules
isinstancerH   initkaiming_normal_weightr'   	constant_rI   normal_ra   bn3)r,   rr   rs   rt   ru   rv   rw   mr-   r/   r0   r*      s<   


zVideoResNet.__init__rX   c                 C   sT   |  |}| |}| |}| |}| |}| |}|d}| |}|S rM   )ru   r~   r   r   r   r   flattenr   )r,   rX   r/   r/   r0   r\      s   







zVideoResNet.forwardr   rQ   rP   blocksr    c           
   	   C   s   d }|dks| j ||j kr+||}ttj| j ||j d|ddt||j }g }||| j |||| ||j | _ td|D ]}	||| j || qDtj| S )Nr   F)r&   r    r'   )	rO   r_   r4   rG   rS   rH   rI   appendrange)
r,   rr   rQ   rP   r   r    rR   	ds_stridert   ir/   r/   r0   r}   
  s   

zVideoResNet._make_layer)rq   F)r   )r8   r9   r:   typer   rN   ra   r   r   rL   r@   listr;   r   rG   r`   boolr*   r   r\   rS   r}   r>   r/   r/   r-   r0   r      sB    4r   rr   rs   rt   ru   .weightsprogresskwargsr"   c                 K   sT   |d urt |dt|jd  t| |||fi |}|d ur(||j|dd |S )Nrv   
categoriesT)r   
check_hash)r   lenmetar   load_state_dictget_state_dict)rr   rs   rt   ru   r   r   r   modelr/   r/   r0   _video_resnet$  s   	r   rK   zKhttps://github.com/pytorch/vision/tree/main/references/video_classificationzThe weights reproduce closely the accuracy of the paper. The accuracies are estimated on video-level with parameters `frame_rate=15`, `clips_per_video=5`, and `clip_len=16`.)min_sizer   recipe_docsc                	   @   D   e Zd Zedeedddi eddddd	id
dddZeZdS )r   z7https://download.pytorch.org/models/r3d_18-b3b3357e.pthp   r   rx      	crop_sizeresize_sizeiP5Kinetics-400gO@g-T@zacc@1zacc@5gK7YD@g"_@
num_params_metrics_ops
_file_sizeurl
transformsr   N	r8   r9   r:   r   r   r
   _COMMON_METAKINETICS400_V1DEFAULTr/   r/   r/   r0   r   C  $    r   c                	   @   r   )r   z7https://download.pytorch.org/models/mc3_18-a90a0ba3.pthr   r   r   iPu r   g{GO@gQU@r   gClE@gtVF@r   r   Nr   r/   r/   r/   r0   r   W  r   r   c                	   @   r   )r   z<https://download.pytorch.org/models/r2plus1d_18-91a641e6.pthr   r   r   ir   gʡP@g33333U@r   gOnBD@g1Z^@r   r   Nr   r/   r/   r/   r0   r   k  r   r   
pretrained)r   T)r   r   c                 K   .   t | } tttgd g dt| |fi |S )a  Construct 18 layer Resnet3D model.

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R3D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R3D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R3D_18_Weights
        :members:
    rb   r   r   r   r   )r   verifyr   rN   r   re   r   r   r   r/   r/   r0   r        
r   c                 K   s4   t | } tttgtgd  g dt| |fi |S )a  Construct 18 layer Mixed Convolution network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.MC3_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MC3_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MC3_18_Weights
        :members:
    r	   r   )r   r   r   rN   r   rL   re   r   r/   r/   r0   r     s   
r   c                 K   r   )a  Construct 18 layer deep R(2+1)D network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R2Plus1D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R2Plus1D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R2Plus1D_18_Weights
        :members:
    rb   r   )r   r   r   rN   r@   rn   r   r/   r/   r0   r     r   r   )
_ModelURLs)r   r   r   )6collections.abcr   	functoolsr   typingr   r   r   r   torch.nnrG   torchr   transforms._presetsr
   utilsr   _apir   r   r   _metar   _utilsr   r   __all__rH   r   rS   r@   rL   r`   rN   ra   re   rn   r   r   r   r;   r   r   r   r   r   r   r   r   r   r   r   r   
model_urlsr/   r/   r/   r0   <module>   sx    %1^
*#*#*$