o
    ,h                 	   @   s
  U d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl
Z	d dlmZ d dlmZmZmZ e jdddkZdZdee fd	d
Zdee fddZdee fddZdee fddZdee fddZdefddZdefddZe jdddkZdZdZdZ dZ!e"e#d< eddddZ$ee#d< e Z%ee e#d< e Z&ee e#d < ed!d"ddZ'ee#d#< dZ(ee#d$< e Z)ee e#d%< e Z*ee e#d&< ed'd(ddZ+ee#d)< dZ,ee#d*< i Z-e.e/e/f e#d+< dZ0ee" e#d,< d-Z1ed. e#d/< e jd0d1dkZ2ee#d2< e jd3d1dkZ3ee#d4< e  Z4ee#d5< e jd6ddkZ5dZ6dZ7e jd7ddkZ8e jd8dkZ9e jd9ddkZ:e jd:e rJd1nddkZ;dZ<dZ=dZ>e jd;d1dkZ?e jd<dkZ@dZAe jd=d>ZBed? e#d@< dZCdZDe ZEdZFdZGdZHdZIe	jJjKjLe#dA< dZMe	jJjKjLe#dB< dZNeee	jOjPgdf  e#dC< dZQeee	jOjPgdf  e#dD< dZReee	jOjSjPgdf  e#dE< dZTeeeUdF geUdF f  e#dG< dZVeeeUdF geUdF f  e#dH< dZWdZXdZYdZZdZ[i Z\e.e/e.e/ef f e#dI< i Z]e.e/e.e/ef f e#dJ< dZ^e jdKddkZ_dZ`dZaddLdMddNZbe.e/ef e#dO< dPZcedQ e#dR< dZdg dSZeeUee/eeUdF geUdF f f  e#dT< dZfee" e#dU< dZgdVZhdWZidXZjeddYdZd[Zkee#d\< e jd]dkZle jd^dkZme jd_dkZne jd`dkZodaZpee" e#db< dZqe rdne jdcdkZre jdddes Zte jdfdgs Zue jdhdis Zvedj e#dk< e jdldis Zwedj e#dm< dZxdnZye jdodkZze jdpdkZ{e jdqdkZ|drZ}dsZ~dsZe jdtdkZe jdudkZe jdvdkZe"e jdwdZe jdxdyZe jdzd{Zd|e/defd}d~Zd|e/defddZd|e/defddZe jddiZe	jjsFdnd1Ze jdedkZe jdd1dkZe jdddkZe jddkZdZdZdZdZdZe jddkZdZe jddkZee#d< e jddkZee#d< e jddyZe jdd1dkZee#d< daZe jdddkZdMZdZdZdZdZdZdZdZdZe"e jdd Ze jdd1dkZdZdZdZdZdZdZdMZdMZdZdZdZe jdd1dkZde	jv pde	jv Ze p	eZe jdddkZdZee/ e#d< dZee/ e#d< de/fddZe Ze/e#d< eddddZee#d< dZdXZddgZeUeed e/f  e#d< dZee#d< G dd dZdefddZde"fddZe rqdneă Zee" e#d< e Zee#d< eddddZee#d< e jdd1dkZee#d< ee/ e#d< e rz d dlmZ eːreʠe j͠eˠde jСdZneʠdZW n eefy   dZY nw dZdaZe jdddkZe jdddkZdZdZdZdZdZdZdZee#d< e jdd1dkZdZdZdZdZe jddyZedykZedkrdyneZe jddZee/ e#d< e jd¡dkZdZe jdd1dkZee#d< dZee#d< dZee#d< dZee#d< dZee#d< dZee#d< dZee#d< e jdd1dkZe jdd1dkZee#d< dZee#d< G ddЄ dЃZG dd҄ d҃ZG ddԄ dԃZG ddք dփZG dd؄ d؃ZdZed e#d< dZed e#d< G ddބ dރZG dd dZg dZeUe/ e#d< g dZeUe/ e#d< g ZeUee	je	je	jgdf  e#d< G dd dZ erd dlT eeje  dS )    N)AnyCallableLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinstall_config_moduleTORCHINDUCTOR_INPLACE_PADDING1Freturnc                   C      t dS )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr
    r   r   Q/var/www/html/scripts/venv/lib/python3.10/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_default      r   c                   C   s,   t jddkr
dS t jddkrdS d S )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   r   vec_isa_ok_default   s
   r   c                   C   r   )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   autotune_remote_cache_default   r   r   c                   C   r   )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   r   %bundled_autotune_remote_cache_default   r   r   c                   C   s   t dt sdS d S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r
   r   r   r   r   r   )bundle_triton_into_fx_graph_cache_default#   s   r!   c                  C   s<   d} dt jv rt jddkS t rtjd}|| kS dS )N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r   r   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversionr   r   r   static_cuda_launcher_default*   s   
r*   c                  C   s@   d} dt jv rt jddkS t rd}tj|}|| kS dS )Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr$   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer)   r   r   r   prologue_fusion_enabled9   s   
r.   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2Ti  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cacheTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_forcedefaultfx_graph_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITEnon_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez)pytorch/remote_cache:force_disable_caches"TORCHINDUCTOR_FORCE_DISABLE_CACHESforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rB   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separatecpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAXTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)nonerR   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?r"   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)r4   tritonatenrf   mixed_mm_choice)reorder_compute_for_overlap
sink_waitsraise_comms'reorder_for_compute_comm_overlap_passesreorder_prefetch_limitr4   i,     *TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)r4   r3   r2   use_experimental_benchmarkerTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM!TORCHINDUCTOR_DISABLE_DECOMPOSE_K
   autotune_num_choices_displayed"TORCHINDUCTOR_FORCE_SAME_PRECISION(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPP(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r|   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacei    #TORCHINDUCTOR_SEARCH_AUTOTUNE_CACHETORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mmnamec                 C   s   t | pt| S )N)collect_autoheuristicuse_autoheuristicr   r   r   r   run_autoheuristic  s   r   c                 C      | t jjjdv S N,)r%   	_inductorconfigautoheuristic_collectsplitr   r   r   r   r        r   c                 C   r   r   )r%   r   r   autoheuristic_user   r   r   r   r   r   
  r   r   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT         .TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusion'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   TORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL%TORCHINDUCTOR_EMULATE_PRECISION_CASTSdevgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesc                  C   s4   dt jv rt jd } nd} | dv sJ d|  | S )NTORCHINDUCTOR_WORKER_START
subprocess)r   forkspawnzInvalid start method: )r   r   )start_methodr   r   r   decide_worker_start_method  s   

r   worker_start_methodz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingfuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tpc                   @   s&   e Zd ZU dZeed< dZeed< dS )_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)__name__
__module____qualname__r   bool__annotations__r   intr   r   r   r   r     s   
 r   c                  C   s   d} d}t j|}| |kS )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    r"   z0pytorch/inductor:enable_parallel_compile_version)r%   r&   r'   )ENABLE_PARALLEL_COMPILE_VERSIONr-   r)   r   r   r   #parallel_compile_enabled_internally  s   r   c                  C   s   ddl } | t}dtjv rttjd }|d| |S tjdkr+d}|d |S t	 r:t
 s:d}|d |S ttd	rFttdnt }|sNJ td
|}|d| |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32r"   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr   r   r   r   infosysplatformr   r   hasattrlenr   	cpu_countmin)r   logcompile_threadsr   r   r   r   decide_compile_threads   s,   





r   r   use_static_cuda_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDING   i   force_shape_padTORCHINDUCTOR_PERMUTE_FUSIONTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmassume_aligned_inputs.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesc                   @   s  e Zd ZU dZejdddkZejdddkZdZ	e
e ed< eejdd	Zdejd
ejdkr6dndfZeed ef ed< ejdddkZejdddkZdZe
e ed< dZe
e ed< edZe
e ed< dZed ed< eejddZejdddkZejdddkZejddZejdddkZ dZ!eejd dZ"ejd!dZ#ejd"dZ$d#Z%dZ&ejd$ddkZ'dZ(dS )%cpp$TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE4096CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr%   r  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH))r   r   r   threadsr   r   r   no_redundant_loopsdynamic_threadsr   r   r   r   min_chunk_sizer   r   r  tupler   strenable_kernel_profileweight_prepackr  r  r
   r  r   r  max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferr   r   r   r   r     sN   
 r   c                   @   s  e Zd ZU ejddkZdZdZdZ	e
eeeeedf f   ed< dZdZe r,dndZdZd	Ze
e ed
< dZdZdZdZdZdZdZejde sOdnddkZeed< dZe
e ed< dZeed< dZ dZ!dZ"e
e ed< dZ#eed< dZ$eed< dZ%dZ&ejdddkZ'ejdddkZ(dZ)e*d ed< ejdddkZ+ejdddkZ,dZ-eejddZ.e*d ed< ejdddkZ/dZ0dZ1dZ2eed < dZ3dZ4e
e5 ed!< dZ6ejd"ddkZ7ejd#ddkZ8dZ9dS )$rg   TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr   2   "cudagraph_dynamic_shape_warn_limit&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductions!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr  r	  r  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   r"         multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16      spill_thresholdr  ENABLE_PERSISTENT_TMA_MATMULTORCHINDUCTOR_SKIP_L1):r   r   r   r   r   r   
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsr-  r   r  r   r   r   slow_path_cudagraph_asserts!cudagraph_trees_history_recordingr    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr/  force_cudagraph_syncforce_cudagraphs_warmupfast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr1  r   r2  r3  autotune_pointwiseautotune_cublasLtr4  r5  r6   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr  r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsr>  divisible_by_16min_split_scan_rblockstore_cubinrB  use_block_ptrr  r  codegen_upcast_to_fp32enable_persistent_tma_matmulskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYr   r   r   r   rg   F  sr   
 $

	rg   c                   @   s  e Zd ZU dZdZejdddkZejddZ	ejddZ
ed	 ed
< ejddZdZdZdZeed< dZeed< dZeed< dZeed< i Zeeef ed< ejdddkZeed< ejdddkZeed< eejddZeed< i Zeeef ed< dZeed< dZeed< dZ eed< dZ!eed< e"  Z#eed < dZ$eed!< dZ%eed"< dZ&e'e ed#< i Z(ee)j*j+e,e f ed$< dZ-e'e,e  ed%< dS )&aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   &AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightspackagepackage_cpp_onlymetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr<  repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interfaceTpackage_constants_in_sopackage_constants_on_diskprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libs).r   r   r   __doc__output_pathr   r   r   debug_compilecompile_wrapper_opt_levelrk  r   r   filtered_kernel_namesserialized_in_specserialized_out_specrm  r   rn  ro  rp  rq  dictr  rs  ru  r   rw  rx  r   ry  rz  r{  r|  r   r}  r~  r  r  r   r  r%   _ops
OpOverloadlistr  r   r   r   r   rd    sF   
 
	
rd  c                
   @   s  e Zd ZU dZdZee ed< dZee ed< dZ	e
d ed< dZdZdZdZejd	ejejejejd
ZdZee ed< g dZee ed< ejdddkZdZdZee ed< dZ eed< ejdddkZ!e"ed< ejdZ#ee ed< ejdZ$ee ed< ejddZ%eed< ejdZ&ee ed< ejdddkZ'e"ed< ejd ddkZ(e"ed!< ejd"d#Z)eed$< d%Z*e"ed&< dZ+e"ed'< dZ,e"ed(< dS ))cudaz9Settings for cuda backend, today this consists of cutlassNarchr)   -O1)-O0r  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/cutlass_max_profiling_configs)r"   r<  r   r   %cutlass_max_profiling_swizzle_optionsCUTLASS_EPILOGUE_FUSIONr   r   cuda_cxxr"   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_levelTORCHINDUCTOR_CUTLASS_PRESETScutlass_presets+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSallcutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_write)-r   r   r   r  r  r   r  r   r)   r  r   enable_cuda_ltoenable_ptxas_infoenable_debug_infouse_fast_mathr   r   r   pathabspathjoindirnamer%   __file__cutlass_dirr  r   r  r  cutlass_epilogue_fusion_enabledcutlass_tma_onlyr  r  r  r   r  r  r  r  r  r  r  r  r  r  r   r   r   r   r    sX   
 




r  c                   @   s   e Zd ZU g Zee ed< ddgZee ed< dZe	d ed< dZ
dZd	Zd	ZdZd
Zee ed< ejdZejdddkZeed< d
Zee ed< d
Zee ed< d
Zee ed< dZeed< d
Zeee  ed< dZeed< d
S )rocmr  gfx90agfx942ck_supported_archr  )	r  r  r  r  z-Osz-Ozz-Ominz-Ofastz-Omaxr  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   r  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweeprA  split_k_threshold)r   r   r   r  r  r  r   r  r  r   is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   r   ck_dirr  r   r  r   r  r  r  r  r  r   r   r   r   r    s,   
 
r  )r   rg   halidecpu_backend)rg   r  cuda_backendc                   @   sB   e Zd ZU dZdZdZed ed< dZed ed< dZ	dZ
dZd	S )
r  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFN)r   r   r   
cpu_target
gpu_targetr  r   r   r  assertsdebugscan_kernelsr   r   r   r   r  N  s   
 r  c                   @   s   e Zd ZU ejdddkZejdddkZdZe	e
 ed< dZdZdZdZdZdZdZejd	ddkZejd
ddkZejddZejddZdZdZe	ee
gdf  ed< ejdddkZdZeed< dS )traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTS1log_inductor_triton_kernel_to_post_grad_node_info)r   r   r   r   r   r   enabledsave_real_tensorsr  r   r  r   	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   log_autotuning_resultsr  r   r   r   r   r   r  j  s&   
 
r  )
ztrace.upload_tarrY   rZ   r[   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierrW   rX   r   r\   _save_config_ignore)r  zcuda.cutlass_dirr   r   rX   rW   r   r\   r   r5   r6   r:   r;   _cache_config_ignore_prefixexternal_matmulc                   @   sV   e Zd ZU dZeed< dZee ed< dZ	dZ
dZee ed< dZee ed< dZdS )test_configsF%force_extern_kernel_in_multi_templateNmax_mm_configsautotune_choice_name_regexautotune_choice_desc_regex)r   r   r   r  r   r   r  r   r   runtime_triton_dtype_assertstatic_cpp_dtype_assertr  r  r  *graphsafe_rng_func_ignores_fallback_randomr   r   r   r   r    s   
 r  )*(  r   r   typingr   r   r   r   r   r   r%   !torch._inductor.custom_graph_passtorch._environmentr   torch.utils._config_moduler	   r
   r   r   r   inplace_paddingcan_inplace_pad_graph_inputr   r   r   r   r   r!   r*   r.   enable_auto_functionalized_v2r  disable_progressverbose_progressr0   r   r   r5   r6   r7   r9   r:   r;   r<   r>   r?   r@   r  r  rA   rD   rF   rH   rI   online_softmaxdcestatic_weight_shapessize_assertsnan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  bfloat16_atomic_adds_enabledrV   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passrW   r   custom_graph_passCustomGraphPassTyperX   rY   fxGraphrZ   r[   graphr\   r  r]   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionr^   r_   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmre   ri    reorder_for_compute_comm_overlaprm   rn   reorder_for_peak_memoryestimate_op_runtimeintra_node_bwinter_node_bwrq   max_autotunemax_autotune_pointwisemax_autotune_gemmdisable_decompose_krw   graph_partitionforce_same_precisionuppermax_autotune_gemm_backendsmax_autotune_conv_backendsr~   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   r   r   r   autoheuristic_log_pathr)   hiplayout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdfallback_randomimplicit_fallbacks assume_unaligned_fallback_outputaggressive_fusionr   r   enabled_metric_tablesr   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_size)max_fusion_buffer_group_pairwise_attemptsmax_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapesjoint_graph_constant_foldingdebug_index_assertsemulate_precision_casts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r   r   r   _fuse_ddp_communication_fuse_ddp_bucket_sizer   r   r   r   r   r   r   r   r   libfb.pyr   __package__get_dir_pathr  r  replacesepr   
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastdisable_padding_cpupadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler   permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testing_profile_varprofile_bandwidthprofile_bandwidth_regexr   /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenr   r   r   r   r   r   r   enable_linear_binary_foldingr   r   r   rg   rd  r  r  r  r  r  r  r  r  r  Tensorr  torch.utils._config_typingmodulesr   r   r   r   r   <module>   s  
  


 

	%

v Gu CC$
