o
    ,h                     @   st  d Z ddlZddlZddlZddlZddlZddlmZmZ ddl	m
Z
mZmZ ddlZddlm  mZ ddlmZmZmZmZmZ ddlmZmZ ddlmZmZmZmZ ddl m!Z!m"Z" dd	l#m$Z$m%Z%m&Z& dd
l'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9 ddl:m;Z;m<Z< ddl=m>Z>m?Z? ddl@mAZA ddlBmCZC erddlDmEZE dZFe(eGdZHe(eGdZIdd ZJdd ZKdd ZLdd ZMG d d! d!ZNG d"d# d#ZOG d$d% d%ZPeO ZQg d&ZReAeeejSegZTeU aVd'd( ZWG d)d* d*ZXd+aYd+aZd+a[d+a\da]ej^d9d-e_fd.d/Z`ej^d0d1 Zad:d3d4Zbd5d6 Zcd7d8 ZddS );a  
Provides functionality for compiling PyTorch's autograd (automatic differentiation) system.

This module implements compiled autograd, which traces and optimizes backward pass
computations at runtime. The key components are:

- AutogradCompilerInstance: Traces and compiles autograd graphs using FX
- Context managers (_enable/_disable): Control when compiled autograd is active
- Utility functions: Support graph manipulation, tensor operations, and hooks

Compiled autograd can significantly improve backward pass performance by removing
Python overhead and enabling additional optimizations. It works by capturing
backward computations into an FX graph that can be compiled and optimized,
while maintaining the same semantics as eager mode autograd.
    N)Counterdefaultdict)OptionalTYPE_CHECKINGUnion)call_accumulate_gradcall_backward	call_hookFakeCompiledAutogradEngineunwrap_maybe_dynamic_intGetItemSourceLocalSource)countersget_chromium_event_loggerlazy_format_graph_codeset_locals_to_steal)AutogradLazyBackwardCompileInfo%CachedAutogradLazyBackwardCompileInfocompile_contextCompileContext	CompileId)getArtifactLoggertrace_structuredclone_preserve_strides)FakeTensorMode)GraphModule)BackwardState)	decomposedisable_autocast_cachedisable_proxy_modes_tracingfetch_object_proxyProxyTorchDispatchModePythonKeyTracertrack_tensor_tree)
DimDynamicShapeEnv)preserve_node_metaset_stack_trace)
OrderedSet)CapturedTraceback)Proxya  You can turn off compiled autograd by either:
1. Moving the unsupported autograd call outside of the torch.compile'd region.
2. Wrapping the unsupported autograd call in the torch._dynamo.compiled_autograd._disable() context manager.
3. Setting torch._dynamo.config.compiled_autograd=False for the torch.compile call containing the unsupported autograd call.
4. Setting torch._dynamo.config.compiled_autograd=False at the start of the program.compiled_autogradcompiled_autograd_verbosec                   C   s   t jjjdS )Nr/   )torch_logging	_internal	log_stateis_artifact_enabled r5   r5   Z/var/www/html/scripts/venv/lib/python3.10/site-packages/torch/_dynamo/compiled_autograd.py snapshot_verbose_logging_enabledR   s   
r7   c                   C   s   t jjjjS N)r0   	_inductorconfigtriton
cudagraphsr5   r5   r5   r6   snapshot_cudagraph_enabledX      r=   c                 C   s   | d urt | S | S r8   r   )xr5   r5   r6   maybe_clone\   s   r@   c                 C   sb   t | jtr
| jjS t | jtr-tjj  | j	 W  d    S 1 s&w   Y  d S t
d)NzEUnexpected Lazy Backward Compilation Info Type. Please file an issue.)
isinstance_lazy_backward_infor   	bw_moduler   r0   _subclassesfake_tensorunset_fake_temporarilybw_module_fnAssertionError)CompiledFunctionr5   r5   r6   extract_bw_moduleb   s   $rJ   c                   @   sT   e Zd ZdefddZdejjfddZde	ej
 fdd	Zd
e	ej
 fddZdS )
NaNCheckeraccumulate_gradc                 C   s   || _ g | _i | _g | _d S r8   )rL   params_indicesparams_to_checkoutput_names)selfrL   r5   r5   r6   __init__~      
zNaNChecker.__init__graphc                 C   s   t t|j}|jdtd}|jddd jd }| jt|kr&| j| ks(J |D ]'}|jd }|jt	j
krF|jd |u rFt|jd tsHJ | j|jd  q*dd |D | _d S )	Ncall_functionoptargetoutputrV   r      c                 S   s   g | ]}|j qS r5   )name).0noder5   r5   r6   
<listcomp>   s    z.NaNChecker.prep_with_graph.<locals>.<listcomp>)nextiternodes
find_nodesr   argsrL   boolrW   operatorgetitemrA   intrM   appendrO   )rP   rS   inputs_nodeacc_grad_nodesoutput_nodesr]   
param_noder5   r5   r6   prep_with_graph   s"   
zNaNChecker.prep_with_graphinputsc                 C   s`   | j sd S | jD ]%}|| j}|d ur"t| r"J d| d|| | jd| d< qd S )Nz9Compiled autograd running under anomaly mode with inputs[zD] already having NaN gradient. This is not supported. {TURN_OFF_MSG}zinputs[])rL   rM   gradr0   isnananyrN   )rP   rn   idxrp   r5   r5   r6   prep_with_inputs   s   


zNaNChecker.prep_with_inputsoutc                 C   s   | j r6|rJ g }| j D ]\}}|jd usJ t|j r&|| q|r4tdd	| dd S g }t
|D ]\}}t| rO|| j|  q<|r]tdd	| dd S )Nz9Compiled Autograd returned NaN gradients for parameters: ,.z;Compiled Autograd returned NaN gradients for output nodes: )rL   rN   itemsrp   r0   rq   rr   rh   RuntimeErrorjoin	enumeraterO   )rP   ru   
nan_params
inputs_strparam	nan_gradsirp   r5   r5   r6   check   s.   
zNaNChecker.checkN)__name__
__module____qualname__rd   rQ   r0   fxGraphrm   tupleTensorrt   r   r5   r5   r5   r6   rK   }   s
    rK   c                   @   $   e Zd Zdd Zdd Zdd ZdS )OpNamespacec                 C   s   t  | _d S r8   )r   custom_function_name_counterrP   r5   r5   r6   rQ      r>   zOpNamespace.__init__c                    s   |rd| }| j | }| j |  d7  < | | }t| |r!J t||| |r5t| |tj  |S tjj fdd}t| || |S )NCppNoderZ   c                     s    | i |S r8   r5   rc   kwargsresultr5   r6   run_non_traceable_cpp_in_eager      z7OpNamespace.add.<locals>.run_non_traceable_cpp_in_eager)r   hasattrOpsetattrr0   _dynamoallow_in_graphdisable)rP   r[   fnis_custom_functionis_traceablecountr   r5   r   r6   add   s   
	zOpNamespace.addc                 C   s
   t | |S r8   )getattr)rP   r[   r5   r5   r6   get   s   
zOpNamespace.getN)r   r   r   rQ   r   r   r5   r5   r5   r6   r      s    r   c                   @   r   )r   c                 C   s   || _ || _|| _d| _d S )Nz#torch._dynamo.compiled_autograd.ops)r   r   r   r   )rP   r[   r   r   r5   r5   r6   rQ      rR   zOp.__init__c                 O   s   | j |i |S r8   )r   )rP   rc   r   r5   r5   r6   __call__      zOp.__call__c                 C   s   | j d | j S )Nrw   )r   r   r   r5   r5   r6   __repr__   r   zOp.__repr__N)r   r   r   rQ   r   r   r5   r5   r5   r6   r      s    r   )rn   sizesscalarshookspacked_datac                 C   s   t tt| d d dS )N)compiled_autograd_idframe_idframe_compile_idr   )r   r5   r5   r6   make_compile_context  s   r   c                   @   s,  e Zd ZdgddZdd ZedefddZd	ee	j
 d
ee deeeef  deeeeef   dedefddZdee fddZdd Zdede	jjjdee fddZdd Zdd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Z d+d, Z!d-d. Z"d/d0 Z#d1efd2d3Z$d	ee	j
 d4ed1efd5d6Z%d7d8 Z&d9d: Z'd;d< Z(dee fd=d>Z)d?d@ Z*dAdB Z+dCdD Z,dEdF Z-dGdH Z.edIdJ Z/edKdL Z0dMdN Z1dOdP Z2dQdR Z3dSdT Z4dUdV Z5dWdX Z6dYdZ Z7d[d\ Z8	dhdeeeeef   fd]d^Z9d_efd`daZ:dbedceddee	jj; fdedfZ<dS )iAutogradCompilerInstancereturnNc                 C   sT   || _ t | _| jj| _t | _tdd| jd| _t	 | _
t| j
d| _d | _d S )NT)allow_fallback_kernelsallow_non_fake_inputs	shape_envsymbolic)compiler_fn
contextlib	ExitStackstackcloser(   r   r   fake_tensor_moder%   	fx_tracerr$   
proxy_modehooks_proxy)rP   r   r5   r5   r6   rQ     s   


z!AutogradCompilerInstance.__init__c                 C   s    t |tjsJ | jj||dS )N)source)rA   r0   r   r   from_tensor)rP   r?   r   r5   r5   r6   	wrap_fake"  s   z"AutogradCompilerInstance.wrap_fakec                 C   s   t t| |S r8   r   )r[   rs   r5   r5   r6   r   &  r   zAutogradCompilerInstance.sourcern   r   r   originsrL   
check_nansc              
      s  t d d  d7  < tt _tt _t j _ j	  |r%t
|nd  _t  _t jd jd jidd tj  j_tjjtd j_i  j_i  _ fdd	tD \} _ _ _ _  j!"t#  |\}}	}
|d
 }zt$|D ]\}} %| &d|||< qyW n t'y } zt(dt)| dt* |d }~ww  +|||  fddt$|D } fddt,t-|D }t$|D ]\}} j.dt/|| fi ||< ||  j|j0< qȈ +|||	}t$|D ]<\}} &d|}t1|tr
 j23||t4j5||< qt1|t6r$ j2j7 j2j8||t4j5d||d||< qt9dt)| +| j|
 t$|D ]\}} j|  j|j0< q7 j!"t:i   j!" j;  j!" j<  j!"t=   j;j2d uslJ  j;j2} j!"tjj>j?@| tAtBC |||fS )Nr.   capturesrZ   graph_idTlog_pt2_compile_event)
tracer_clsc                 3   s"    | ]} j d |di V  qdS )placeholderr5   N)r   create_proxy)r\   r[   r   r5   r6   	<genexpr>J  s
    
z9AutogradCompilerInstance.begin_capture.<locals>.<genexpr>r   rn   zFound tensor of type z,, which is not supported by FakeTensorMode. c              	      s*   g | ]\}} j | d |tjqS )r   )r   $create_unspecified_symint_and_symbolr   r'   DYNAMIC)r\   rs   valr   r5   r6   r^   ^  s    
z:AutogradCompilerInstance.begin_capture.<locals>.<listcomp>c                    s   g | ]} j | qS r5   )sizes_proxyr\   r   r   r5   r6   r^   j      rT   r   )r   dynamic_dim)hintr   zUnexpected scalar type: )Dr   r_   COMPILE_COUNTERidr   rg   aot_id_counterr   r   	__enter__rK   nan_checkertimetime_nsstart_time_nsr   log_event_startr0   nnModuler   rootr   r   r%   rS   tensor_attrssymnode_proxy_lookup_graph_placeholdersr   scalars_proxyr   packed_data_proxyr   enter_contextr)   r{   r   r   	ExceptionNotImplementedErrortypeTURN_OFF_MSGbind_objects_to_proxiesrangelenr   r   r]   rA   r   r   r'   r   floatcreate_symfloatnodecreate_unspecified_symbolrH   r    r   r   r!   experimentalsymbolic_shapes_suppress_guardsstrr   current_compile_id)rP   rn   r   r   r   rL   r   
args_proxyinputs_originssizes_originsscalars_originsr?   rs   eproxiesr   symintr   r   symvalenvr5   r   r6   begin_capture*  s   	









z&AutogradCompilerInstance.begin_capturecompile_reasonsc                    s&    sJ t ddd  fddd d S )Nartifactc                   S   
   dddS )N!compiled_autograd_compile_reasonsjsonr[   encodingr5   r5   r5   r5   r6   <lambda>     z>AutogradCompilerInstance.log_compile_reasons.<locals>.<lambda>c                      s    S r8   r5   r5   r   r5   r6   r    s    metadata_fn
payload_fn)r   )rP   r   r5   r  r6   log_compile_reasons  s   

z,AutogradCompilerInstance.log_compile_reasonsc                    s   fdd  D }j}t||j|j|j ~t r-jD ]	}	|	j	r,t
dq#tjjfdd}
jjd|
||g|R i dd |d urRj|  fdd	}| }fd
d}tjjjj||d}tj|}|S )Nc                       g | ]}  |qS r5   to_proxyr\   r   r   r5   r6   r^     r   zDAutogradCompilerInstance.proxy_call_aot_backward.<locals>.<listcomp>z@torch.compile does not currently support higher order gradients.c                    s"   t jjjj| | g|R  }|S r8   )r0   
_functorch_aot_autogradruntime_wrappers_backward_prologue_functional)ctx_saved_tensorsctx_symints	flat_argsru   )maybe_subclass_metadatametadatar5   r6   call_aot_bwd_prologue  s   
zOAutogradCompilerInstance.proxy_call_aot_backward.<locals>.call_aot_bwd_prologuerT   kindrW   rc   r   c                     s  dd } | j }fddt|td u D } }t|tjks)J fdd|D }||d t|< d urC| d}i d }t j r\ dj  7  j  d7  <  fd	d
}j j	D ]}|j
dkr|| j}	||j|	_|	|< |d7 }qo|j
dkrt|jdksJ fdd|jd D }qo|j
dkr|j}
j|
}tjj|t|
 jd|di }||j|_||< qo|j
dkr|jtjjjjkrtjjjj|_jj |fdd}||j|_||< qo|j
dkr-|j}
j|
}tjj|t|
 jj |fdd}||_||< qotd|d us8J dd fdd|D }|| |S )Nc                 S   s,   d}| j D ]}|jdkr|d7 }q |S |S )Nr   r   rZ   )ra   rV   )rS   num_argsr]   r5   r5   r6   
num_inputs  s   

zkAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.num_inputsc                       g | ]} | qS r5   r5   r   )pgradsr5   r6   r^     s    zkAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.<listcomp>c                    r	  r5   r
  r  r   r5   r6   r^     r   r   _rZ   c                    s   d  d|  S )Naotr  r5   )	node_name)deduped_aot_idr5   r6   make_unique     zlAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.make_uniquer   rX   c                    s2   g | ]}t |tjjrtj|  jn|qS r5   )rA   r0   r   Noder-   r   r\   n)rP   value_remapr5   r6   r^      s    get_attrr5   rT   c                        |  S r8   r5   r%  r&  r5   r6   r  4      ziAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.<lambda>call_modulec                    r(  r8   r5   r)  r*  r5   r6   r  =  r+  zshouldn't get herec                   S   s<   t   tdddddW  d    S 1 sw   Y  d S )Nr   {   r"   r0   zerosr5   r5   r5   r6   dummyH  s   $zfAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graph.<locals>.dummyc                    s$   g | ]}t |tjjr  n|qS r5   )rA   r0   r   r-   )r\   o)r0  r5   r6   r^   L  s    )rS   r   rg   _get_compiled_autograd_symintsr   symintsrh   r   r   ra   rV   r]   r[   rc   rW   r   get_fresh_qualnamer   r   r   create_noder0   opsatenviewdefaultreshape	node_copyrH   r   )r  r  	pall_argsr3  psymintsargs_idxpoutputsr!  r]   phr[   qualnamer   outputs)aot_idrC   ctxpbackward_stater  rP   )r   r0  r&  r6   copy_paste_aot_backward_graph  s|   













zWAutogradCompilerInstance.proxy_call_aot_backward.<locals>.copy_paste_aot_backward_graphc                    sX   t jj fdd}tj|}jjd|t|i d}	 }
|g|g |S )Nc                     s   j |  dS )N)
is_runtime)creation_fn)unwrapped_argsrG  subclass_metar5   r6   make_subclassU  r   zkAutogradCompilerInstance.proxy_call_aot_backward.<locals>.proxy_subclass_constructor.<locals>.make_subclassrT   r  )r0   r   r   pytreetree_mapr  r   r   r   allocate_dummyr   )rK  rG  rI  rL  punwrapped_argspoutputrX   r   rJ  r6   proxy_subclass_constructorT  s   zTAutogradCompilerInstance.proxy_call_aot_backward.<locals>.proxy_subclass_constructor)make_subclass_override)r2  _forward_clsrJ   r  r  _aot_idr0   is_grad_enabledoutput_inforequires_gradry   r   r   r   r   r   r  r  r  _backward_epilogue_functionalrM  rN  r  )rP   pinputspsaved_tensorssaved_tensorspctxrD  maybe_backward_state_idxr=  rI   output_alias_infor  rF  rB  rR  resultspresultsr5   )rC  rC   rD  r  r  rE  r  rP   r6   proxy_call_aot_backward  sP   


e
z0AutogradCompilerInstance.proxy_call_aot_backwardbackward_idxrD  r^  c              
   C   s  | j d usJ | j | }| |}| |}	t|jdr'| ||	||||}
n| jjdt||	g|R i d}
|
d us<J t > g }t	|D ]&\}}|d u sT|
| d u rZ|
d  qF|\}}}}|
tj||||d qF| ||
 W d    t|S 1 sw   Y  t|S )NrU  rT   r  )sizedtypelayoutdevice)r   r  r   rT  rb  r   r   r   r"   r{   rh   r0   emptyr   r   )rP   rn   output_metadatasr\  rc  rD  r^  r]  rZ  r[  r   grad_insrs   output_metadatarf  rg  re  rd  r5   r5   r6   proxy_call_backwardo  sN   	


	


z,AutogradCompilerInstance.proxy_call_backwardc           	   	   C   sJ   ||  ||  ||  ||  ||  ||  |f}| t|d gd S )N   )r  
proxy_callcopy_slices_prologue)	rP   rn   
base_sizesbase_stridesbase_storage_offset
view_sizesview_stridesview_storage_offsetrc   r5   r5   r6   call_copy_slices_prologue  s   	z2AutogradCompilerInstance.call_copy_slices_prologuec                 C   s    |  t||||fd gt| S r8   )rn  copy_slices_epiloguer   )rP   needs_input_gradr   res
grad_slicer5   r5   r6   call_copy_slices_epilogue  s
   
z2AutogradCompilerInstance.call_copy_slices_epiloguec                 C   s8   t   tddgW  d    S 1 sw   Y  d S )Nr   i[r.  r   r5   r5   r6   rO    s   $z'AutogradCompilerInstance.allocate_dummyc                 C   s   t ||||S )zBinds ops.fn_name = fn)r6  r   )rP   fn_namer   r   r   r5   r5   r6   bind_function  r"  z&AutogradCompilerInstance.bind_functionc                 C   s    t |}| ||g|R |S )z:Proxies a call to ops.fn_name(grads, *args) into the graph)r6  r   rn  )rP   r|  gradsrc   rk  rV   r5   r5   r6   apply_functional  s   
z)AutogradCompilerInstance.apply_functionalc                    sn   t |\}}t fdd|}jjd||i d fdd|D }| fddtt|D  |S )z*Proxies a call to fn(*args) into the graphc                    s
     | S r8   r
  )r   r   r5   r6   r    s   
 z5AutogradCompilerInstance.proxy_call.<locals>.<lambda>rT   r   c                    s   g | ]}   qS r5   )rO  )r\   r  r   r5   r6   r^         z7AutogradCompilerInstance.proxy_call.<locals>.<listcomp>c                    r  r5   r5   r   )	proxy_outr5   r6   r^     r  )rM  tree_flattenrN  r   r   r   r   r   )rP   r   rc   rk  r  r  
proxy_argsr   r5   )r  rP   r6   rn    s   "z#AutogradCompilerInstance.proxy_callc                 C   sX   t d}t| j|g|R }| jjd||i d}t|t|ks$J | || |S )zEProxies a call to ops.validate_outputs(outputs, *args) into the graphvalidate_outputsrT   r   )	r6  r   rM  rN  r  r   r   r   r   )rP   r  rB  rc   rk  rV   r  new_proxy_outputsr5   r5   r6   r    s   
z)AutogradCompilerInstance.validate_outputsc                 C   sJ   |  |}|  |}| jjdtj||fi d}|  }| |g|g |S NrT   r   )r  r   r   r0   r   rO  r   )rP   old_varnew_varold_var_proxynew_var_proxyr  r   r5   r5   r6   
accumulate  s   

z#AutogradCompilerInstance.accumulatec                 C   s*   | j jdt| || ||fi d d S r  )r   r   r   r  )rP   variablerp   has_post_hooksr5   r5   r6   rL     s   
z(AutogradCompilerInstance.accumulate_gradc                    s(    j dt|g fdd|D R |S )NrT   c                    r	  r5   r
  r\   r?   r   r5   r6   r^     r   z<AutogradCompilerInstance.proxy_call_hook.<locals>.<listcomp>)r   r   r	   )rP   hookrc   r   r5   r   r6   proxy_call_hook  s   z(AutogradCompilerInstance.proxy_call_hookc                 C   sN   | j d usJ | j | }| j| }| j||dd}|  }| |g|g |S )Nunpack_hook	hook_type)r   r   r  rO  r   )rP   hook_iddata_idr  dataproxyru   r5   r5   r6   r    s   

z$AutogradCompilerInstance.unpack_hookr   c                 C   s|   | j d usJ | j | }| j||| dd}t  t|| ||< | || g|g W d    |S 1 s7w   Y  |S )Ntensor_pre_hookr  )r   r  r"   r@   r   )rP   rn   r  r   r  r  r5   r5   r6   r    s   

z(AutogradCompilerInstance.tensor_pre_hookr  c              	   C   sx   | j dtjjjj|| || fi }t  t	|| ||< | 
|| g|g W d    |S 1 s5w   Y  |S NrT   )r   r   r0   _Cr   r.   call_cpp_tensor_pre_hooksr  r"   r@   r   )rP   rn   r  r   r  r5   r5   r6   cpp_tensor_pre_hook  s   

z,AutogradCompilerInstance.cpp_tensor_pre_hookc                 C   sn   | j d usJ | j | }| j||dd}t  dd |D }| || W d    |S 1 s0w   Y  |S )Npre_hookr  c                 S      g | ]}t |qS r5   r@   r  r5   r5   r6   r^   2  r  z5AutogradCompilerInstance.pre_hook.<locals>.<listcomp>r   r  r"   r   )rP   rn   r  r  r   r5   r5   r6   r  )  s   

z!AutogradCompilerInstance.pre_hookc                 C   sp   | j d usJ | j | }| j|||dd}t  dd |D }| || W d    |S 1 s1w   Y  |S )N	post_hookr  c                 S   r  r5   r  r  r5   r5   r6   r^   @  r  z6AutogradCompilerInstance.post_hook.<locals>.<listcomp>r  )rP   rB  rn   r  r  r   r5   r5   r6   r  6  s   

z"AutogradCompilerInstance.post_hookc                 C   s|   t |tjsJ | jd usJ | j| }| j||dd}t  t|g}| ||g W d    |S 1 s7w   Y  |S )Npost_acc_grad_hookr  )rA   r0   r   r   r  r"   r@   r   )rP   inputr  r  r  r5   r5   r6   r  D  s   


z+AutogradCompilerInstance.post_acc_grad_hookc                 C   sB  i }d}t |j}|d jdksJ |d }t |j }tt}|| |d ks+J |t| d }|| |d ks=J t|D ]>\}	}
|sS|
jd j	j
dkrSd}qA|
jd j	j
d	k}t|
jd  dk}|r|rt |
j }td
d |D r|
||	< qA|r| D ]}
td|
 |
jd  |
jd< qt | S g S )NFr   rn   rZ   r   cudaTcpuc                 s   sB    | ]}t |jtjjo|jjd v pt |jto|jj V  qdS ))primsr7  N)rA   rW   r0   _ops
OpOverload	namespacer   r   r\   userr5   r5   r6   r   n  s    	

zDAutogradCompilerInstance.move_graph_nodes_to_cuda.<locals>.<genexpr>zMoving node %s from cpu to cuda)listra   rW   userskeysr   r   r{   metarg  r   rd  allvaluesverbose_logdebugr  )rP   rS   to_movehas_cuda_inputsra   rn   inputs_usersfirst_getitem_idxlast_getitem_idxr   r]   is_cpu	is_scalar
node_usersr5   r5   r6   move_graph_nodes_to_cudaW  s:   
	z1AutogradCompilerInstance.move_graph_nodes_to_cudac                 C   s6   t |tjjo|jdko|jtjjjj	tjjj
jfv S r  )rA   r0   r   r#  rV   rW   r6  r7  sym_sizerg   	sym_numelr9  )rP   r]   r5   r5   r6   is_sym_node  s   z$AutogradCompilerInstance.is_sym_nodec                    s   t   t| jjjddD ]\}} |j  q|tt	d ks$J  fdd}t| jjj
}| jj| t| jjj
}td||  d S )Nr   rY   rZ   c                    s(   |  v s| j dkr| jtv rdS |  S )NrT   T)rV   rW   _impure_targets	is_impurer]   unpack_nodesr5   r6   r    s   z/AutogradCompilerInstance.dce.<locals>.is_impurezDCE removed %d nodes)r+   r{   r   rS   rb   updater  r  r   r   ra   eliminate_dead_coder  r  )rP   r   r]   r  beforeafterr5   r  r6   dce  s   zAutogradCompilerInstance.dcec           
      C   s   g }g }t | jjj}t| t|}|jdksJ |j D ]}|jt	j
ks)J |jr2|| q|| qt }|D ]1}t|jtsGJ |jd |ksPJ t|jd tsZJ t|}||jd  |jd |f|_q=|D ]	}	| jj|	 qq|S )Nr   r   rZ   )r`   r   rS   ra   r_   r[   r  r  rW   re   rf   rh   setrA   rc   r   rg   r   r   
erase_node)
rP   
used_sizesunused_sizesit
sizes_nodegetitem_nodeused_sizes_idxusednext_size_idxunusedr5   r5   r6   remove_unused_sizes  s,   z,AutogradCompilerInstance.remove_unused_sizesc                 C   s   t | jj| jj|S r8   )r   r   r   rS   )rP   r   r5   r5   r6   create_graph_module  s   z,AutogradCompilerInstance.create_graph_modulec              	      s  j dtjdi  j  j ddj |fi  g t	 r,
j jj jjD ]}dD ]}||jv r@|j|= q5q1tddd fddd	                 jrzjj j  d
j  t dg td dddd}td| t !d| td fddd fdd}t" j#dt$% djij&dd j'(d d d  |) fS )NrT   r5   rX   )tensor_metaexample_valuer   r   c                   S   r   )N&compiled_autograd_graph_pre_reorderingstringr   r5   r5   r5   r5   r6   r    r  z6AutogradCompilerInstance.end_capture.<locals>.<lambda>c                      s&   t  jj jjd j djddS )NCompiledAutogradPreReorderingFprint_output)r   r   r   rS   r   print_readabler5   r   r5   r6   r    s    r  r  rn   zCompiled autograd graphT)include_deviceinclude_stridecoloredz%scompiled_autograd_graphc                      s    j ddS )NFr  )r  r5   )rS   r5   r6   r    s    )r  c              	      s4  zda jrj| g }t|D ]$\}}|v r6|dkr1|td| tj|d d q|| q D ]}	||	 	 j
dd||	< q9t : tj% | |||||}
jrcj|
 |
W  d    W  d    W da S 1 syw   Y  W d    n1 sw   Y  W da d S W da d S da w )NTr   r  rZ   )non_blockingF)in_compiled_autograd_regionr   rt   r{   rh   r0   rh  r   maybe_mark_dynamic
pin_memoryr  _disabler   r   r   )compiled_fnrn   r   r   r   packed_inputsfiltered_sizesrs   integerr   ru   )runtime_inputs_to_moverP   r  r5   r6   runtime_wrapper  s6   

0z=AutogradCompilerInstance.end_capture.<locals>.runtime_wrapperr.   r   r   )*r   r   r
   _exec_final_callbacks_stubr   r   r5  
create_argr  r=   r  rS   ra   r  r   delay_unpack_hook_nodesreorder_tensor_pre_hook_nodes'reorder_pre_hook_nodes_to_schedule_asapreorder_accumulate_grad_nodes%reorder_pre_hook_nodes_to_mimic_eager reorder_post_acc_grad_hook_nodesreorder_post_hook_nodesr  r   rm   r  r  r   r   r   compiled_autograd_loginfor  r  r   log_event_endr   r   r   r   __exit__r   )rP   rB  r]   fieldlazy_graph_coder  r5   )rS   r  rP   r  r6   end_capture  s~   



z$AutogradCompilerInstance.end_capturec                 C   s   dd | D }|S )Nc                 S   s    g | ]}t |tjju r|qS r5   )r   r0   r   r#  r$  r5   r5   r6   r^   E  s     z:AutogradCompilerInstance.get_all_nodes.<locals>.<listcomp>r5   )rc   ra   r5   r5   r6   get_all_nodesB  s   z&AutogradCompilerInstance.get_all_nodesc                 C   s8   | j dks| j dkr| jtjkr| jd j dkrdS dS )Nr   rT   r   TF)rV   rW   re   rf   rc   r  r5   r5   r6   is_placeholderH  s   

z'AutogradCompilerInstance.is_placeholderc                 C   s   | j jjdtdD ]:}|jd |jd }}d}|jtjkr%|}|jd }t||g}||j	urC| 
|sC|| |durC|| q	dS )a  
        Usage of AOTAutograd causes all the accumulate_grad_ nodes to get pushed to the end of
        the graph.  This differs from eager mode, which schedules them as soon as possible. This
        pass attempts to reorder the graph to mimic eager behavior.
        rT   rU   r   rZ   N)r   rS   rb   r   rc   rW   re   rf   maxprevr  rh   )rP   r]   rl   	grad_noder  argr5   r5   r6   r  R  s   



z6AutogradCompilerInstance.reorder_accumulate_grad_nodesc                 C   sD   | j jjdtdD ]}|jdddkrq	t|j}|| q	dS )zp
        We can delay unpack hooks until they are needed, even later than in the eager autograd engine.
        rT   rU   r  Nr  )	r   rS   rb   r	   r   r   minr  prepend)rP   r]   
first_userr5   r5   r6   r  g  s   

z0AutogradCompilerInstance.delay_unpack_hook_nodesc                 C   sl   | j jjdtdD ]*}|jdddkrq	|jd }|jd }||jur3| |s3|	| |	| q	dS )a  
        Usage of AOTAutograd causes all the tensor_pre_hook nodes to get pushed
        to the end of the graph. This differs from eager mode, which schedules
        them as soon as possible. This pass attempts to reorder the graph to
        mimic eager behavior.
        rT   rU   r  Nr  r   rZ   )
r   rS   rb   r	   r   r   rc   r  r  rh   )rP   r]   r  
input_noder5   r5   r6   r  t  s   




z6AutogradCompilerInstance.reorder_tensor_pre_hook_nodesc                 C   s   | j jjdtdD ]s}|jdddkrq	|jd }| |jd }g }g }|g}|D ]}|jdkrJ|j	t
jkrJ||jd  || || q+t||D ]\}}	|| ||	 qPt|}
|
|jur|| |
s||
| |D ]}|| qtq	dS )a  
        In this function, we schedule the pre hooks as soon as possible. This
        does not match eager behavior (schedule pre hook right before its
        registered node), but it can make acc grad be scheduled properly when
        the pre hooks are registered to them. After reordering acc grad node, we
        will reorder the pre hooks again to mimic eager behavior.
        rT   rU   r  Nr  r   rZ   )r   rS   rb   r	   r   r   rc   r   rV   rW   re   rf   rh   zipremover  r  r  )rP   r]   r  input_nodes	to_remove	to_append
hook_blockr%  abr  r5   r5   r6   r    s4   





z@AutogradCompilerInstance.reorder_pre_hook_nodes_to_schedule_asapc                 C   s   g }| j jjdtdD ]}|jdddkrq|| qt|D ]D}|jd }t	|j
 }t|dkr6q!tdd |D sAJ tt|d j
 }||jure|| || |D ]}|| q]q!dS )	a%  
        Usage of AOTAutograd causes all the pre_hook nodes to get pushed to the
        end of the graph. This differs from eager mode, which schedules them
        right before their registered node execution. This pass attempts to
        reorder the graph to mimic eager behavior.
        rT   rU   r  Nr  r   c                 s   s&    | ]}|j d ko|jtjkV  qdS )rT   N)rV   rW   re   rf   r  r5   r5   r6   r     s
    
zQAutogradCompilerInstance.reorder_pre_hook_nodes_to_mimic_eager.<locals>.<genexpr>)r   rS   rb   r	   r   r   rh   reversedrc   r  r  r  r   r  r_   r`   r  )rP   	pre_hooksr]   hook_getitem_noder  registered_noderf   r5   r5   r6   r    s.   




z>AutogradCompilerInstance.reorder_pre_hook_nodes_to_mimic_eagerc                 C   s   g }| j jjdtdD ]}|jdddkrq|| qt|D ]8}|jd }|jd }d}t	|j
 D ]}|jdkrF|jtkrF|} nq6|dusOJ d|| || q!dS )	a  
        Usage of AOTAutograd causes all the post_acc_grad_hook nodes to get
        pushed to the end of the graph. This differs from eager mode, which
        schedules them as soon as possible. This pass attempts to reorder the
        graph to mimic eager behavior.
        rT   rU   r  Nr  r   rZ   z8post_acc_grad_hook must have corresponding acc grad node)r   rS   rb   r	   r   r   rh   r  rc   r  r  r  rV   rW   r   )rP   post_acc_grad_hooksr]   r  rl   acc_grad_noder%  r5   r5   r6   r    s,   




z9AutogradCompilerInstance.reorder_post_acc_grad_hook_nodesc                    sd  g }| j jjdtdD ]  jdddkrq|  qt|D ]  jd } jd } jd }t	|dkr9q!g }|
t| |D ]}|
 fd	d
t|j D  qDt|}|jdkr|jtkr|jd }d}	t|j D ]}
|
jdkr|
jtkr|
jdddkr|
}	qt|	dur|	| |  q!| jur| |s|| |  q!dS )a  
        Usage of AOTAutograd causes all the post_hook nodes to get pushed to the
        end of the graph. This differs from eager mode, which schedules them as
        soon as possible. This pass attempts to reorder the graph to mimic eager
        behavior.
        rT   rU   r  Nr  r   rZ      c                 3   s:    | ]}|j d kr|jtkr jdddks|V  qdS )rT   r  Nr  )rV   rW   r	   r   r   r  r  r5   r6   r     s    

zCAutogradCompilerInstance.reorder_post_hook_nodes.<locals>.<genexpr>r  )r   rS   rb   r	   r   r   rh   r  rc   r   extendr  r  r  r  rV   rW   r   r  r  )rP   
post_hooksr  rk   r  input_nodes_and_usersr	  r  rl   post_acc_grad_hook_noder%  r5   r  r6   r    sL   












z0AutogradCompilerInstance.reorder_post_hook_nodesc                    s   |d u rd S t |tr fdd|D S t |tr$t fdd|D S t |tjtjfr3 j|j S t |tjs;|S t	 j
|}t |tjjjjsLJ |jS )Nc                    r	  r5   r
  r  r   r5   r6   r^   0  r   z5AutogradCompilerInstance.to_proxy.<locals>.<listcomp>c                 3   s    | ]}  |V  qd S r8   r
  r  r   r5   r6   r   2  s    z4AutogradCompilerInstance.to_proxy.<locals>.<genexpr>)rA   r  r   r0   SymIntSymFloatr   r]   r   r#   r   r   r   proxy_tensor_ProxyTensorr  )rP   tr  r5   r   r6   r  ,  s   

z!AutogradCompilerInstance.to_proxyc                    s   t  tjjrB|r5t|t|ksJ g }tt|D ]}|| \}}| ||d  | |  q| n fddtt|D  t|t ksLJ t| d | j	d  S )Nc                    r  r5   r5   r   r   r5   r6   r^   I  r  zDAutogradCompilerInstance.bind_objects_to_proxies.<locals>.<listcomp>constanttracer)
rA   r0   r   r-   r   r   set_node_originrh   r&   r   )rP   objectsr   r   bound_proxiesr   nodecall_indexr  r5   r"  r6   r   <  s   z0AutogradCompilerInstance.bind_objects_to_proxiesindexc                 C   s4   | j d usJ | j | }t }t||d | jd |S )Nr#  )r   r   r&   r   )rP   r*  r  bw_stater5   r5   r6   bind_backward_stateO  s
   
z,AutogradCompilerInstance.bind_backward_stater  r)  pyobjc           	      C   sp   d}|d ur|j }t|dr|jd u rtd|j}| | d| d}t  d }|d|}t	| d S )N rU  zThis compiled backward function was saved by AOTAutogradCache, which does not support
                    compiled autograd. Please turn off AOTAutogradCache using `TORCHINDUCTOR_AUTOGRAD_CACHE=0`.z (NodeCall )r  z:raw_stack_trace = CapturedTraceback.extract().format()[-1])
rT  r   rB   ry   rU  r,   extractformatreplacer*   )	rP   r  r)  r-  maybe_aot_idforward_clsnew_coderaw_stack_tracenew_stack_tracer5   r5   r6   r&  V  s   

z(AutogradCompilerInstance.set_node_originr   Nr8   )=r   r   r   rQ   r   staticmethodr   r   r  r0   r   rg   r   r   r   r   rd   r   r  rb  autogradfunctionBackwardCFunctionr   rl  rv  r{  rO  r}  r  rn  r  r  rL   r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r   r,  Functionr&  r5   r5   r5   r6   r     s    

u
 C
3
1"w

	$#$9

r   FTdynamicc              
   c   sH   |s
t r
d V  d S |rt|tu sJ ddlm} |jjdkr.dazd V  W dad S daw dd l}|j	j
jtt| |\}}t rL|j	j
jt dat}td7 az5|jd d V  W d    n1 siw   Y  W |ssda|j	j
j|| td8 at|ksJ dd S |sda|j	j
j|| td8 at|ksJ dw )Nr   )
eval_frameforce_eagerTFrZ   zINested Compiled Autograd Contexts must return before their parent context)active_disable_ctxr   rd   torch._dynamor?  _stancestance%compiled_autograd_enabled_force_eagertorch._inductor.cudagraph_treesr  r   r.   set_autograd_compiler	functoolspartialr   r7   set_verbose_loggerr  compiled_autograd_enableddepthr:  set_multithreading_enabled)r   r>  ignore_active_disable_ctxr?  r0   prior_compilerprior_dynamicprior_depthr5   r5   r6   _enable}  s\   





rR  c               
   c   sp    t jjjd d\} }datsdazd V  W | rdadat jjj| | d S | r,dadat jjj| | w )NFT)r0   r  r   r.   rG  rK  rA  )rO  rP  r5   r5   r6   r    s,   

r  r   c                   C   sH   da trJ tjjjd d tjjjd  tjjj  t	
 ad S )NF)rK  r  r0   r  r   r.   rG  rJ  clear_cache	itertoolsr   r   r5   r5   r5   r6   reset  s   rU  c                 C   sT   | d }| ||}|d usJ || || }	||||	}
||
|
jtjdgS )Nr   )memory_format)new_empty_stridedcopy_
as_stridedcloner0   contiguous_format)rn   rp  rq  rr  rs  rt  ru  rp   r   offsetrz  r5   r5   r6   ro    s   	
ro  c                 C   sf   d gt |  }tt | D ]#}| | r0|| d u rq|dkr*|||  |||< q|| ||< q|S )Nr   )r   r   rX  )rx  r   ry  rz  grad_inputsr   r5   r5   r6   rw    s   
rw  )TTr8  )e__doc__r   rH  rT  re   r   collectionsr   r   typingr   r   r   r0   torch.utils._pytreeutils_pytreerM  torch._dynamo.external_utilsr   r   r	   r
   r   torch._dynamo.sourcer   r   torch._dynamo.utilsr   r   r   r   /torch._functorch._aot_autograd.runtime_wrappersr   r   torch._guardsr   r   r   torch._loggingr   r   torch._prims_commonr   torch._subclassesr   torch.fxr   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr    r!   r"   r#   r$   r%   r&   %torch.fx.experimental.symbolic_shapesr'   r(   torch.fx.tracebackr)   r*   torch.utils._ordered_setr+   torch.utils._tracebackr,   torch.fx.proxyr-   r   r   r  r  r7   r=   r@   rJ   rK   r   r   r6  r   r  r  r   r   r   r   rK  rE  r  rA  rL  contextmanagerrd   rR  r  rU  ro  rw  r5   r5   r5   r6   <module>   s   $	

O	        eG

