
    /j                    &   U d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZmZmZmZ e	rd dlmZ e j                            dd          dk    Zd	Zd
ee         fdZd
ee         fdZd
ee         fdZd
ee         fdZd
ee         fdZd
efdZ d
efdZ!e j                            dd          dk    Z"d	Z#dZ$d	Z% e            rdndZ& e'e j                            dd                    Z(e'e)d<    edddd          Z*ee)d<   d	Z+ee)d<    e            Z,ee         e)d<    e            Z-ee         e)d<    ed d!d"          Z.ee)d#<   dZ/ee)d$<    e            Z0ee         e)d%<    e            Z1ee         e)d&<    ed'(          Z2ee)d)<   d	Z3ee)d*<   i Z4e5e6e6f         e)d+<   dZ7ee'         e)d,<   d-Z8ed.         e)d/<   e j                            d0d1          dk    Z9ee)d2<   e j                            d3d1          dk    Z:ee)d4<   e j                            d5d1          dk    Z;ee)d6<    e             Z<ee)d7<   e j                            d8d          dk    Z=e j                            d9d          dk    Z>d	Z?dZ@e j                            d:d          dk    ZAe j                            d;          dk    ZBe j                            d<          dk    ZCe j                            d=d          dk    ZDe j                            d> e            rd1nd          dk    ZEdZFdZGdZHe j                            d?d1          dk    ZIe j                            d@          dk    ZJe j                            dAdB          ZKedC         e)dD<   dZLdZM e!            ZNd	ZOdZPd	ZQdZRejS        jT        jU        e)dE<   dZVejS        jT        jU        e)dF<   dZWejS        jT        jX        e)dG<   dZYejS        jT        jU        e)dH<   dZZejS        jT        jU        e)dI<   dZ[eeej\        j]        j^        gdf                  e)dJ<   dZ_eee`dK         ge`dK         f                  e)dL<   dZaeee`dK         ge`dK         f                  e)dM<   dZbd	Zcd	Zdd	ZedZfi Zge5e6e5e6ef         f         e)dN<   i Zhe5e6e5e6ef         f         e)dO<   dZie j                            dPd          dk    Zjd	ZkdZld	dQdRddSZme5e6ef         e)dT<   dUZnedV         e)dW<   d	Zog Zpe`e
e6ee`dK         ge`dK         f         f                  e)dX<   dZqee'         e)dY<   dZrd	Zsd Zte'e)dZ<   d[Zued\         e)d]<   dZveee'ge'f                  e)d^<   d[Zwed_         e)d`<   dZxeee'ge'f                  e)da<   d[Zyed_         e)db<   dZzeee'ge'f                  e)dc<   ddZ{d	Z|ee)de<   dZ}dfZ~dgZ eddhdij          Zee)dk<   e j                            dl          dk    Ze j                            dm          dk    Ze j                            dn          dk    Ze j                            do          dk    Ze j                            dp          dk    Z e' e j        dqdf                    Z e' e j        drds                    Zd
ee'         fdtZ e            Zee'         e)du<   e j                            dvd          dk    Ze j                            dwd1          dk    Ze j                            dxd1          dk    Ze j                            dy e            sdnd1          dk    Zee)dz<   g Ze`e6         e)d{<   g Ze`e6         e)d|<   d	Zee)d}<    ed~dd	"          Zee)d<   g Ze`e'         e)d<   e j                            dd                                          Zd
ee'         fdZ e            Zee'         e)d<   e j                            dd                                          Ze j                            dd                                          Zed         e)d<   e j                            dd                                          Zed         e)d<   d	Zee)d<   d	Zee)d<   dZee)d<   dZee)d<   dZee)d<   dZee)d<   e j                            dd1          dk    Zee)d<   d	ZdZd	Ze j                            d          dk    Ze j                            d          dk    ZdZdZdZe j                            d          dk    Z e'e j                            dd                    Z ee j                            dd                    Ze j                            d          dk    Ze j                            d          dk    Z e'e j                            dd                    Ze j                            dd          Ze j                            dd          Ze j                            dd1          dk    Zde6d
efdZde6d
efdZde6d
efdZe j                            dd          Zej        j        sdnd1Ze j                            de          dk    Ze j                            dd1          dk    Ze j                            dd          dk    Ze j                            d          dk    ZdZdZdZdZee'         e)d<   d	Zd	ZdZe j                            d          dk    ZdZeeg df                  e)d<   d	Ze j                            d          dk    Zee)d<   e j                            d          dk    Zee)d<   e j                            dd          Ze j                            d e            rd1nd          dk    Zee)d<   dZee)d<   dZe j                            dd          dk    ZdRZdZdZdZee'         e)d<   dZd	ZdZd	Zd	Z e j        dd          dk    Z e j        dŦ          dk    Z e'e j                            dd                     Ze j                            dd1          dk    ZdZd	ZdZd	Zd	Zd	ZdRZdRZdZdZd	Zd	ZdZd	Zdej        v pdej        v Z e            peZe j                            dd          dk    ZdZee6         e)d<   dZee6         e)d<    eddϦ          Ze6e)d<   d
e6fdфZ e            Ze6e)d<   dZe'e)d<    eddd"          Zee)d<    edd	Ϧ          Zee)d<   d	ZdfZddgZe`e
ed         e6f                  e)d<   d	Zee)d<   e j                            dd1          dk    ZdZe'e)d<   dZe'e)d<   dZee)d<    G d d          Z G d d          Z d
efdZd
e'fdZ e            rdn
 e            Zee'         e)d<    eddd"          Zee)d<    ed          Ze'e)d<    e             Zee)d<    ed(          Zee)d<    eddd	"          Zee)d<   e j                            dd1          dk    Z	ee)d<    ed(          Z
ee)d<   ee6         e)d<    e            r	 d dlmZ erL ej        e j                            e                    de j                  d                     Zn ej        d           Zn# eef$ r dZY nw xY wdZdZe j                            dd          dk    Ze j                            dd          dk    Zd	Zd	ZdZd	ZdZdZd	ZdZ d	Z!ee)d<   e j                            dd1          dk    Z"d	Z#d	Z$d	Z%d	Z& e             Z'ee)d<   e j                            dd          Z(e(dk    Z)e(dk    rdne(Z*e j                            dd          Z+ee6         e)d	<   e j                            d
          dk    Z,d	Z-e j                            dd1          dk    Z.ee)d<   d	Z/ee)d<   d	Z0ee)d<   d	Z1ee)d<   d	Z2ee)d<   d	Z3ee)d<   d	Z4ee)d<   d	Z5ee)d<   d	Z6ee)d<   e j                            dd1          dk    Z7e j                            dd1          dk    Z8ee)d<   dZ9ee)d<   i Z:e5e6e5e6ef         f         e)d<    e'e j                            dd                    Z;e'e)d<   d	Z<ee)d<   d
ee6         fdZ= eddϦ          Z>e6e)d <    G d! d"          Z? G d# d$          Z@ G d% d&          ZA G d' d(          ZB G d) d*          ZC G d+ d,          ZD eeD           G d- d.eD                      ZE eeD           G d/ d0eD                      ZF G d1 d2          ZGd$ZHed3         e)d4<   d&ZIed5         e)d6<   d7ZJed7         e)d8<   d&ZKed&         e)d9<    G d: d;          ZL G d< d=          ZMg d>ZNe`e6         e)d?<   g d@ZOe`e6         e)dA<   g ZPe`eejQ        ejQ        ejQ        gdf                  e)dB<    e j        dCd          dk    ZR G dD dE          ZS G dF dG          ZTe	rd dHlUT  G dI dJ          ZVe j                            dKd1          dk    ZWee)dL<    eejX        eY                    dS (M      N)Callable)AnycastLiteralOptionalTYPE_CHECKINGUnion)	is_fbcode)Configget_tristate_envinherit_fields_frominstall_config_module)InductorChoicesTORCHINDUCTOR_INPLACE_PADDING1Freturnc                       t          d          S )N#TORCHINDUCTOR_FX_GRAPH_REMOTE_CACHEr        [/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/_inductor/config.pyfx_graph_remote_cache_defaultr          ABBBr   c                      t           j                            d          dk    rdS t           j                            d          dk    rdS d S )NTORCHINDUCTOR_VEC_ISA_OKr   T0F)osenvirongetr   r   r   vec_isa_ok_defaultr!      sE    	z~~011S88t	z~~011S88u4r   c                       t          d          S )N#TORCHINDUCTOR_AUTOTUNE_REMOTE_CACHEr   r   r   r   autotune_remote_cache_defaultr$   $   r   r   c                       t          d          S )N+TORCHINDUCTOR_BUNDLED_AUTOTUNE_REMOTE_CACHEr   r   r   r   %bundled_autotune_remote_cache_defaultr'   (   s    IJJJr   c                  B    t          dt                      sdnd           S )N/TORCHINDUCTOR_BUNDLE_TRITON_INTO_FX_GRAPH_CACHET)r   r
   r   r   r   )bundle_triton_into_fx_graph_cache_defaultr*   ,   s'    9KK)T  r   c                      d} dt           j        v r#t           j                            d          dk    S t                      r%t          j                            d          }|| k    S dS )N   &TORCHINDUCTOR_USE_STATIC_CUDA_LAUNCHERr   z-pytorch/inductor:static_cuda_launcher_versionTr   r   r    r
   torch_utils_internaljustknobs_getval_int)STATIC_CUDA_LAUNCHER_VERSIONversions     r   static_cuda_launcher_defaultr4   3   sk    #$ /2:==z~~FGG3NN	 '<<;
 
 666 tr   c                      d} dt           j        v r#t           j                            d          dk    S t                      r'd}t          j                            |          }|| k    S dS )Nr   TORCHINDUCTOR_PROLOGUE_FUSIONr   z(pytorch/inductor:prologue_fusion_versionTr.   )ENABLE_PROLOGUE_FUSION_VERSIONjk_namer3   s      r   prologue_fusion_enabledr9   B   sg    %&"&"*44z~~=>>#EE	 <'<<WEE888tr   "TORCHDYNAMO_AUTO_FUNCTIONALIZED_V2T-/logs/dedicated_log_torch_compile_worker_rank,TORCHINDUCTOR_PRECOMPILATION_TIMEOUT_SECONDSi,  precompilation_timeout_secondsz0pytorch/remote_cache:enable_local_fx_graph_cache$TORCHINDUCTOR_FX_GRAPH_CACHE_DEFAULTTORCHINDUCTOR_FX_GRAPH_CACHE)justknobenv_name_defaultenv_name_forcedefaultfx_graph_cacheremote_gemm_autotune_cachefx_graph_remote_cache!bundle_triton_into_fx_graph_cachez>pytorch/remote_cache:enable_non_blocking_remote_cache_write_v2-TORCHINDUCTOR_NON_BLOCKING_REMOTE_CACHE_WRITE)r@   rB   rC   non_blocking_remote_cache_writeautotune_local_cacheautotune_remote_cachebundled_autotune_remote_cachez*torch.compiler.config.force_disable_caches)aliasforce_disable_caches&unsafe_skip_cache_dynamic_shape_guards!unsafe_marked_cacheable_functionssleep_sec_TESTING_ONLYneeds_fixed_stride_order)rR   flexible_layout'triton_kernel_default_layout_constraintTORCHINDUCTOR_CPP_WRAPPERr   cpp_wrapper(TORCHINDUCTOR_CPP_WRAPPER_BUILD_SEPARATEcpp_wrapper_build_separateTORCHINDUCTOR_FX_WRAPPER
fx_wrappercpp_cache_precompile_headersTORCHINDUCTOR_ONLINE_SOFTMAX$TORCHINDUCTOR_APPLY_GUMBEL_MAX_TRICKTORCHINDUCTOR_SIZE_ASSERTSTORCHINDUCTOR_NAN_ASSERTS(TORCHINDUCTOR_RUNTIME_TRITON_NAN_ASSERTSTORCHINDUCTOR_SCALAR_ASSERTSTORCHINDUCTOR_ALIGNMENT_ASSERTSTORCHINDUCTOR_MEMORY_PLANNINGTORCHINDUCTOR_USE_FAST_MATHTORCHINDUCTOR_MEMORY_POOLintermediates)nonerf   outputscombinedmemory_poolpost_grad_custom_pre_passpost_grad_custom_post_passcustom_partitioner_fnjoint_custom_pre_passjoint_custom_post_passpre_grad_custom_passz+torch._inductor.scheduler.BaseSchedulerNode_pre_fusion_custom_pass_post_fusion_custom_passpre_grad_fusion_optionspost_grad_fusion_options"TORCHINDUCTOR_DYNAMIC_SCALE_RBLOCKg-C6?   )pre_grad	precisionnum_iterationsrequires_optimizerfx_passes_numeric_check	heuristic)rC   tritonatenr|   mixed_mm_choice'reorder_for_compute_comm_overlap_passesreorder_prefetch_limit&size_threshold_for_succ_based_strategyrg   )rg   all	only_fsdpbucket_all_gathers_fx.bucket_all_gathers_fx_bucket_size_determinatorrg   r   bucket_reduce_scatters_fx2bucket_reduce_scatters_fx_bucket_size_determinatorbucket_all_reduces_fx.bucket_all_reduces_fx_bucket_size_determinatorrC   !runtime_estimations_mms_benchmark   g      I@*TORCHINDUCTOR_USE_EXPERIMENTAL_BENCHMARKERz-pytorch/inductor:use_experimental_benchmarker)rC   rB   r@   use_experimental_benchmarker+TORCHINDUCTOR_DISTRIBUTED_MAX_AUTOTUNE_GEMM&TORCHINDUCTOR_PIPELINE_GEMM_AUTOTUNINGTORCHINDUCTOR_MAX_AUTOTUNE$TORCHINDUCTOR_MAX_AUTOTUNE_POINTWISETORCHINDUCTOR_MAX_AUTOTUNE_GEMM%TORCHINDUCTOR_DEFAULT_AUTOTUNE_WARMUP"TORCHINDUCTOR_DEFAULT_AUTOTUNE_REPd   c                      t           j                            d          } | dS |                                 dv rd S t	          |           S )N,TORCHINDUCTOR_AUTOTUNE_NUM_CHOICES_DISPLAYED
   r   r   r   r    lowerintenv_vals    r   '_autotune_num_choices_displayed_defaultr     sD    jnnKLLGr}}/))tw<<r   autotune_num_choices_displayed/TORCHINDUCTOR_MAX_AUTOTUNE_REPORT_CHOICES_STATS<TORCHINDUCTOR_MAX_AUTOTUNE_PRUNE_CHOICES_BASED_ON_SHARED_MEM-TORCHINDUCTOR_TRITON_DISABLE_DEVICE_DETECTIONTORCHINDUCTOR_GRAPH_PARTITIONgraph_partitioncustom_should_partition_opscudagraph_unsafe_unbacked_ops#max_autotune_allow_flexible_layoutsz%pytorch/compiler:force_same_precision"TORCHINDUCTOR_FORCE_SAME_PRECISIONforce_same_precisionmulti_kernel_hints(TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_BACKENDSzATEN,TRITON,CPPc                      t           j                            dd          } |                                 dv rd S t	          |           S )N*TORCHINDUCTOR_NVGEMM_MAX_PROFILING_CONFIGS5r   r   r   s    r   %_nvgemm_max_profiling_configs_defaultr   K  s<    jnnI3OOG}}/))tw<<r   nvgemm_max_profiling_configs(TORCHINDUCTOR_MAX_AUTOTUNE_CONV_BACKENDSzATEN,TRITON,TORCHINDUCTOR_MAX_AUTOTUNE_GEMM_SEARCH_SPACEDEFAULT)r   
EXHAUSTIVEmax_autotune_gemm_search_space,TORCHINDUCTOR_MAX_AUTOTUNE_FLEX_SEARCH_SPACEmax_autotune_flex_search_spacefallback_by_defaultselective_decomposeuse_dceuse_pre_grad_passesuse_joint_graph_passesuse_post_grad_passesCUTEDSL_ENABLE_AUTOTUNINGcutedsl_enable_autotuningi    TORCHINDUCTOR_SAVE_ARGS!TORCHINDUCTOR_AUTOTUNE_IN_SUBPROCg      N@g        #TORCHINDUCTOR_AUTOTUNE_MULTI_DEVICE(TORCHINDUCTOR_COLLECTIVE_BENCHMARK_NRUNS50*TORCHINDUCTOR_COLLECTIVE_BENCHMARK_TIMEOUT30'TORCHINDUCTOR_COORDINATE_DESCENT_TUNING5TORCHINDUCTOR_COORDINATE_DESCENT_CHECK_ALL_DIRECTIONS'TORCHINDUCTOR_COORDINATE_DESCENT_RADIUS#TORCHINDUCTOR_AUTOHEURISTIC_COLLECT TORCHINDUCTOR_AUTOHEURISTIC_USEmixed_mm'TORCHINDUCTOR_RUN_JIT_POST_COMPILE_HOOKnamec                 >    t          |           pt          |           S N)collect_autoheuristicuse_autoheuristicr   s    r   run_autoheuristicr     s     &&A*;D*A*AAr   c                 X    | t           j        j        j                            d          v S N,)r/   	_inductorconfigautoheuristic_collectsplitr   s    r   r   r     s#    5?)?EEcJJJJr   c                 X    | t           j        j        j                            d          v S r   )r/   r   r   autoheuristic_user   r   s    r   r   r     s#    5?);AA#FFFFr   $TORCHINDUCTOR_AUTOHEURISTIC_LOG_PATH!TORCHINDUCTOR_LAYOUT_OPTIMIZATIONTORCHINDUCTOR_FORCE_LAYOUT_OPT TORCHINDUCTOR_KEEP_OUTPUT_STRIDETORCHINDUCTOR_WARN_MIX_LAYOUT          realize_acc_reads_size_threshold.TORCHINDUCTOR_ASSUME_UNALIGNED_FALLBACK_OUTPUTr   inductor_choices_classTORCHINDUCTOR_DEBUG_FUSIONdebug_fusionTORCHINDUCTOR_BENCHMARK_FUSIONbenchmark_fusion#TORCHINDUCTOR_ENABLED_METRIC_TABLES(TORCHINDUCTOR_LOOP_ORDERING_AFTER_FUSIONloop_ordering_after_fusionloop_index_inversion_in_fusionr   'TORCHINDUCTOR_BENCHMARK_EPILOGUE_FUSION@   max_fusion_unique_io_buffersTORCHINDUCTOR_SPLIT_REDUCTIONSTORCHINDUCTOR_DETERMINISTICTORCHINDUCTOR_MIN_NUM_SPLITTORCHINDUCTOR_BENCHMARK_KERNEL   devgit0TORCHINDUCTOR_OPTIMIZE_SCATTER_UPON_CONST_TENSORadd_pre_grad_passesremove_pre_grad_passesTORCHINDUCTOR_DISABLED_PASSESrB   rC   disabled_passesc                  n    dt           j        v rt           j        d         } nd} | dv sJ d|              | S )NTORCHINDUCTOR_WORKER_START
subprocess)r  forkspawnzInvalid start method: )r   r   )start_methods    r   decide_worker_start_methodr
    s^    #rz11z">?#     /..	  
 r   worker_start_methodi   small_memory_access_thresholdz(pytorch/compiler:worker_suppress_logging%TORCHINDUCTOR_WORKER_SUPPRESS_LOGGINGworker_suppress_loggingLOG_TLPARSElog_tlparsefuse_ddp_with_concat_opschedule_comm_wait).N_fuse_ddp_communication_passes_micro_pipeline_tp)TORCHINDUCTOR_PARTITIONED_SCATTER_ENABLEDr,   "partitioned_scatter_min_partitions   "partitioned_scatter_max_partitionsg?!partitioned_scatter_memory_budgetc                   ,    e Zd ZU dZeed<   dZeed<   dS )_collectiveFauto_selecti   #one_shot_all_reduce_threshold_bytesN)__name__
__module____qualname__r  bool__annotations__r  r   r   r   r   r  r    s3         K/9'99999r   r  c                      e Zd ZU dZdZeed<   dZee         ed<   dZ	ee         ed<   dZ
ee         ed<   dZee         ed<   dZeeej        j        gee         f                  ed	<   d
Zed         ed<   dZee         ed<   dZee         ed<   dZee         ed<   dZee         ed<   dZeed<   dZeed<   dZee         ed<   dZeed<   dS )aten_distributed_optimizationszDConfiguration for distributed optimization passes on ATen FX graphs.Fenable_overlap_schedulingNcollective_bucketinginsert_overlap_depsmax_compute_pre_fetchcompute_overlap_multiplercustom_runtime_estimation
analytical)r+  	benchmarkcollective_estimatormax_memory_increase_gbmax_memory_increase_ratiomax_in_flight_gbmax_coll_distance!log_final_collectives_estimationsTbucket_exposed_firstenable_fusion_regions&prioritize_bucketing_during_scheduling)r  r  r   __doc__r%  r!  r"  r&  r   r'  r(  r   r)  floatr*  r   r/   fxNoder-  r   r.  r/  r0  r1  r2  r3  r4  r5  r   r   r   r$  r$    s|        NN ',t+++ ,0(4./// +/$... ,08C=///15x555 	 x%(-(5/1Q(RS    @L'";<KKK /3HUO22215x555 )-huo,,, (,x}+++.3%t333 "&$%%%
 -18D>000 48*D77777r   r$  c                  T    d} d}t           j                            |          }| |k    S )a   
    TODO: Remove when parallel compiled is fully enabled internally. For rollout, use a
    knob to enable / disable. The justknob should not be performed at import, however.
    So for fbcode, we assign compile_threads to 'None' below and initialize lazily in
    async_compile.py.
    rv   z0pytorch/inductor:enable_parallel_compile_version)r/   r0   r1   )ENABLE_PARALLEL_COMPILE_VERSIONr8   r3   s      r   #parallel_compile_enabled_internallyr<  -  s0     '(#@G#88AAG*g55r   c                  h   ddl } |                     t                    }dt          j        v r6t          t          j        d                   }|                    d|           nt          j        dk    rd}|                    d           nt                      r&t                      sd}|                    d           nst          t          d	          r!t          t          j        d                    nt          j                    }|sJ t          d
|          }|                    d|           |S )a!  
    Here are the precedence to decide compile_threads
    1. User can override it by TORCHINDUCTOR_COMPILE_THREADS.  One may want to disable async compiling by
       setting this to 1 to make pdb happy.
    2. Set to 1 if it's win32 platform
    3. decide by the number of CPU cores
    r   NTORCHINDUCTOR_COMPILE_THREADSz!compile_threads set to %d via envwin32rv   z"compile_threads set to 1 for win32z"compile_threads set to 1 in fbcodesched_getaffinity    zcompile_threads set to %d)logging	getLoggerr  r   r   r   infosysplatformr
   r<  hasattrlenr@  	cpu_countmin)rB  logcompile_threadsrI  s       r   decide_compile_threadsrM  ;  s+    NNN 

H
%
%C&"*44bj)HIJJ4oFFFF		 	 56666	 ?@BB ?56666 r.// C$Q''((( 	
 yb),,,o>>>r   rL  z+pytorch/inductor:quiesce_async_compile_pool(TORCHINDUCTOR_QUIESCE_ASYNC_COMPILE_POOLquiesce_async_compile_pool<   )rC   quiesce_async_compile_timeuse_static_cuda_launcherz/torch._inductor.config.use_static_cuda_launcheruse_static_triton_launcherz:pytorch/inductor:static_launch_user_defined_triton_kernels7TORCHINDUCTOR_STATIC_LAUNCH_USER_DEFINED_TRITON_KERNELS)static_launch_user_defined_triton_kernels)TORCHINDUCTOR_STRICT_STATIC_CUDA_LAUNCHERstrict_static_cuda_launcherz2torch._inductor.config.strict_static_cuda_launcherstrict_static_triton_launcherglobal_cache_dir)parutil.zfb/cacheTORCHINDUCTOR_SHAPE_PADDING#TORCHINDUCTOR_COMPREHENSIVE_PADDINGi   force_shape_padTORCHINDUCTOR_PERMUTE_FUSION_use_fp64_for_unbacked_floatsTORCHINDUCTOR_PROFILETORCHINDUCTOR_PROFILE_OUTPUTprofile_bandwidth_output3TORCHINDUCTOR_PROFILE_WITH_DO_BENCH_USING_PROFILINGTORCHINDUCTOR_FREEZINGfreezingfreezing_discard_parametersdecompose_mem_bound_mmwrap_inductor_compiled_regionsassume_aligned_inputsassume_32bit_indexing.unsafe_ignore_unsupported_triton_autotune_args"check_stack_no_cycles_TESTING_ONLY*always_complex_memory_overlap_TESTING_ONLY*TORCHINDUCTOR_ENABLE_LINEAR_BINARY_FOLDINGTORCHINDUCTOR_ANNOTATE_TRAININGannotate_training)enable_caching_generated_triton_templatesautotune_lookup_tableTORCHINDUCTOR_FILE_LOCK_TIMEOUT600file_lock_timeoutenable_autograd_for_aotc                      d } t                      rGt          j                            dd           }t          j                            dd          }|d| } | S )NMAST_HPC_JOB_NAME	ROLE_RANKr   r;   )r
   r   r   r    )log_locmast_job_nameglobal_ranks      r   get_worker_log_pathr~  ;  sW    G{{ T
':DAAjnn[#66$SkSSGNr   TORCHINDUCTOR_WORKER_LOGPATHtorchinductor_worker_logpathc                       e Zd ZU ej                            d          dk    Zeed<   dZ	e
ed<   dZe
ed<   ej                            d          # e
ej                            d                    nd	Ze
d	z  ed
<   d	S )auto_chunkerTORCHINDUCTOR_AUTO_CHUNKERr   enablei   output_size_thresholdr   amplify_ratio_threshold TORCHINDUCTOR_CHUNKER_NUM_CHUNKSN	num_chunk)r  r  r   r   r   r    r  r!  r"  r  r   r  r  r   r   r   r  r  M  s         :>>">??3FFDFFF "-3,,, $%S$$$ :>><==I 	BJNN=>>??? sTz     r   r  c                      e Zd ZU dZdZej                            dd          dk    Zej                            dd          dk    Z	dZ
ee         ed<    eej                            d	d
                    Zdej                            dej        dk    rdnd          fZedef         ed<   ej                            dd          dk    Zej                            dd          dk    ZdZee         ed<   dZee         ed<    ed          Zee         ed<   dZed         ed<    eej                            dd                    Zej                            dd          dk    Zej                            dd          dk    Zej                            dd          Z ej                            dd          dk    Z!d Z" eej                            d!d                    Z#ej                            d"d          Z$ej                            d#d          Z%d$Z&d Z'ej                            d%d          dk    Z(d Z)ej                            d&d          dk    Z*ej                            d'd          dk    Z+dS )(cppzu
    Settings for cpp backend.
    This class provides a centralized location for managing cpp backend settings.
    $TORCHINDUCTOR_CPP_NO_REDUNDANT_LOOPSr   !TORCHINDUCTOR_CPP_DYNAMIC_THREADSr   Nsimdlen TORCHINDUCTOR_CPP_MIN_CHUNK_SIZE512CXXdarwinzclang++zg++cxx'TORCHINDUCTOR_CPP_ENABLE_KERNEL_PROFILE TORCHINDUCTOR_CPP_WEIGHT_PREPACKinject_relu_bug_TESTING_ONLYinject_log1p_bug_TESTING_ONLYr   
vec_isa_okoriginal_atenr/   r  inductor_nodedescriptive_names,TORCHINDUCTOR_CPP_MAX_HORIZONTAL_FUSION_SIZE16-TORCHINDUCTOR_CPP_FALLBACK_SCATTER_REDUCE_SUM-TORCHINDUCTOR_CPP_ENABLE_UNSAFE_MATH_OPT_FLAG5TORCHINDUCTOR_CPP_ENABLE_FLOATING_POINT_CONTRACT_FLAGoff)TORCHINDUCTOR_CPP_ENABLE_TILING_HEURISTICF#TORCHINDUCTOR_CPP_GEMM_MAX_K_SLICES%TORCHINDUCTOR_CPP_GEMM_CACHE_BLOCKING%TORCHINDUCTOR_CPP_GEMM_THREAD_FACTORST$TORCHINDUCTOR_CPP_USE_DECOMPOSE_TANH%TORCHINDUCTOR_CPP_FORCE_INLINE_KERNEL-TORCHINDUCTOR_CPP_USE_CONSTEXPR_FOR_INT_ARRAY),r  r  r   r6  threadsr   r   r    no_redundant_loopsdynamic_threadsr  r   r   r"  min_chunk_sizerE  rF  r  tuplestrenable_kernel_profileweight_prepackr  r  r   r  r!  r  r   max_horizontal_fusion_sizefallback_scatter_reduce_sumenable_unsafe_math_opt_flag#enable_floating_point_contract_flagenable_tiling_heuristicsenable_grouped_gemm_templategemm_max_k_slicesgemm_cache_blockinggemm_thread_factorsenable_loop_tail_vecenable_concat_linearuse_decompose_tanhuse_small_dequant_bufferforce_inline_kerneluse_constexpr_for_int_arrayr   r   r   r  r  ^  s          G
 	
=sCCsJ  jnn%H#NNRUUO!GXc]!!!S(JERRSSN 	

u3<8+C+CiiOOCtSy	    	
@#FF#M 
 Z^^$FLLPSSN
 37 (3-66637!8C=777 "2!12L!M!MJMMM 	 wHI   
 "%

EtLL" " 	
FLLPSS   	
FLLPSS   +-*..?+ +' 	
BCHHCO 
 $)  BJNN+PRUVVWW *..)PRVWW *..)PRVWW   !
 	
=sCCsJ 
  % 	
>DDK  	
FLLPSS  r   r  c                      e Zd ZU dZej                            d          dk    ZdZdZ	dZ
eeeeeedf         f                           ed<   dZdZdZ e             Zd	Zd
Zee         ed<   dZdZ edd          Zeed<   dZeed<   dZdZdZdZ dZ!ej                            d e            sdnd          dk    Z"eed<   dZ#ee         ed<   dZ$eed<   dZ%dZ&dZ'ee         ed<   dZ(eed<   dZ)eed<    ej*        dd          dk    Z+eed<   dZ,dZ-ej                            dd          dk    Z.ej                            dd          dk    Z/dZ0e1d         ed<   ej                            dd          dk    Z2ej                            d d          dk    Z3dZ4 eej                            d!d                    Z5e1d"         ed#<   ej                            d$d          dk    Z6d%Z7dZ8e9j:        j;        rd&nd'Z<eed(<   dZ=dZ>dZ?dZ@eeA         ed)<   dZBej                            d*d          dk    ZCej                            d+d          dk    ZDej                            d,d          dk    ZEdZF eej                            d-e9j:        j;        rdnd.                    ZG eej                            d/d0                    ZHej                            d1d          dk    ZIej                            d2 e            rdnd          dk    ZJd3ZKdZLee         ed4<   ej                            d5d          dk    ZMdZNej                            d6          dk    ZOej                            d7d          dk    ZPeed8<   i ZQeReAef         ed9<   d:ZSej                            d;d          dk    ZTeed<<   ej                            d=          ZUeeA         ed><   ej                            d?d          dk    ZVeed@<   ej                            dAd          dk    ZWeedB<   ej                            dCd          dk    ZXeedD<   dS )Er}   z.
    Config specific to codegen/triton.py
    TORCHINDUCTOR_CUDAGRAPHSr   TFN.cudagraph_capture_sizesr  r   "cudagraph_dynamic_shape_warn_limit TORCHINDUCTOR_CUDAGRAPH_OR_ERRORr  cudagraph_or_error%reorder_for_reducing_graph_partitions&TORCHINDUCTOR_COALESCE_TILING_ANALYSISr   coalesce_tiling_analysis	max_tilesprefer_nd_tilingautotune_at_compile_timeautotune_with_sample_inputstile_reductionsTORCHINDUCTOR_NATIVE_MATMULnative_matmul!TORCHINDUCTOR_UNIQUE_KERNEL_NAMES&TORCHINDUCTOR_UNIQUE_USER_KERNEL_NAMESr  r  r  #TORCHINDUCTOR_PERSISTENT_REDUCTIONS$TORCHINDUCTOR_COOPERATIVE_REDUCTIONSTORCHINDUCTOR_MULTI_KERNEL)r   rv   r,      multi_kernelTORCHINDUCTOR_DIVISIBLE_BY_16   rA     spill_thresholdr  ENABLE_PERSISTENT_TMA_MATMULENABLE_TEMPLATE_TMA_STORETORCHINDUCTOR_SKIP_L1$TORCHINDUCTOR_NUM_DECOMPOSE_K_SPLITS10#TORCHINDUCTOR_DECOMPOSE_K_THRESHOLD32TORCHINDUCTOR_ENABLE_PDL!TORCHINDUCTOR_MIX_ORDER_REDUCTIONrv   mix_order_reduction_split_size5TORCHINDUCTOR_MIX_ORDER_REDUCTION_AUTOTUNE_SPLIT_SIZE4TORCHINDUCTOR_MIX_ORDER_REDUCTION_ALLOW_MULTI_STAGES"TORCHINDUCTOR_ENABLE_TLX_TEMPLATESenable_tlx_templatesdebug_dump_kernel_inputsr  %TORCHINDUCTOR_TRITON_PROTON_PROFILINGproton_profiling&TORCHINDUCTOR_TRITON_PROTON_OUTPUT_DIRproton_output_dir'TORCHINDUCTOR_TRITON_PROTON_GROUP_BY_SMproton_group_by_sm-TORCHINDUCTOR_TRITON_PROTON_SPLIT_INVOCATIONSproton_split_invocations-TORCHINDUCTOR_TRITON_PROTON_PER_CTA_OCCUPANCYproton_per_cta_occupancy)Yr  r  r   r6  r   r   r    
cudagraphscudagraph_treescudagraph_skip_dynamic_graphsr  r   r  r	   r   r"  slow_path_cudagraph_asserts!cudagraph_trees_history_recordingcudagraph_trees_objgraphr
    cudagraph_support_input_mutation#cudagraph_unexpected_rerecord_limitr  force_cudagraph_syncforce_cudagraphs_warmupr   r  r!  r  fast_path_cudagraph_assertsskip_cudagraph_warmupdebug_sync_graphdebug_sync_kerneldense_indexingr  r  r  autotune_pointwiseautotune_cublasLtr  r  r  getenvr   tiling_prevents_pointwise_fusion tiling_prevents_reduction_fusionunique_kernel_namesunique_user_kernel_namesr  r   persistent_reductionscooperative_reductionsforce_cooperative_reductionsr  divisible_by_16min_split_scan_rblockstore_cubinr/   r3   hipr  use_block_ptruse_tensor_descriptor)transpose_discontiguous_tensor_descriptorr  r  codegen_upcast_to_fp32enable_persistent_tma_matmulenable_template_tma_storeskip_l1_cache.disallow_failing_autotune_kernels_TESTING_ONLYnum_decompose_k_splitsdecompose_k_threshold
enable_pdlmix_order_reduction"mix_order_reduction_initial_xblockr  'mix_order_reduction_autotune_split_size#mix_order_reduction_non_strict_mode&mix_order_reduction_allow_multi_stagesr  r  dictmax_kernel_dump_occurrencesr  r  r  r  r  r   r   r   r}   r}     s\         
  :;;sBJ O %*! MQXeE#uS#X2F,G&HIPPP #' ).%  % ,59;;$ +.' 9:&999 ! $  &v9         37)4666 #( "   N 	
46UccRU	
 	
 	 d     $Ix}### #d"""   04htn333
 ).--- "OT!!!$ $")$A3GG3NM4NNN (,$'+$ 	
:C@@CG  	
?EEL  	 wHI    	
<cBBcI  	
=sCCsJ 
 $)  ),

3S99) )L'*%   
 jnn%DcJJcQO   K "'!2:22OS::: M "
 15-
 37 (3-666 "
 	
5s;;sB !
 !#
/JC P PTW WJNN#:C@@CGM 6;2 !S

25=;L4VCCRV	
 	
   C

<dCC   :C@@CGJ 	
:99;;<WCCTWXX	  *+&48"HSM888

NPSTT	 , +0' 	
MNNRUU +
 	
;SAASH $    02d38n111
 #$ 	
>DDK d    (*z~~0( (x}   
 	
@#FF#M    
 	
FLLPSS d    	
FLLPSS d     r   r}   c                      e Zd ZU dZdZej                            dd          dk    Zej                            dd          dk    Z	ej                            dd          Z
ej                            d	d          Zed
         ed<   ej                            dd          ZdZdZdZeed<   dZeed<   dZdZeed<   dZee         ed<   dZeed<   i Zeeef         ed<   ej                            dd          dk    Zeed<   dZeed<   ej                            dd          dk    Zeed<    eej                            dd                    Z eed<   i Z!eee"f         ed<   dZ#eed<   dZ$eed <   ej                            d!d          dk    Z%eed"<   dZ&eed#<   dZ'ee         ed$<    e(             Z)eed%<   dZ*ee         ed&<   dZ+ee         ed'<   dZ,ee         ed(<   i Z-ee.j/        j0        e1e         f         ed)<   dZ2ee1e                  ed*<   ej                            d+d          dk    Z3dZ4eed,<   dZ5ee         ed-<   dZ6eee1e         z           ed.<   dZ7ee         ed/<   dS )0aot_inductorz9
    Settings for Ahead-Of-Time Inductor Compilation
    r   AOT_INDUCTOR_DEBUG_COMPILEr   r   AOT_INDUCTOR_DEBUG_SYMBOLS&AOT_INDUCTOR_COMPILE_WRAPPER_OPT_LEVELO1-AOT_INDUCTOR_DEBUG_INTERMEDIATE_VALUE_PRINTER)r   r   23 debug_intermediate_value_printer&AOT_INDUCTOR_FILTERED_KERNELS_TO_PRINTNFuse_runtime_constant_foldingforce_mmap_weightsTpackagepackage_cpp_onlydynamic_linkagemetadata/AOTINDUCTOR_RAISE_ERROR_ON_IGNORED_OPTIMIZATION#raise_error_on_ignored_optimizationcheck_lowerboundDUMP_AOTI_MINIFIERdump_aoti_minifierAOTINDUCTOR_REPRO_LEVELr,   repro_levelpresetsallow_stack_allocationuse_minimal_arrayref_interface)AOT_INDUCTOR_WEIGHT_USE_CACHING_ALLOCATORweight_use_caching_allocatorpackage_constants_in_so package_constants_on_disk_formatprecompile_headersembed_kernel_binaryemit_multi_arch_kernelmodel_name_for_generated_filescustom_ops_to_c_shimscustom_op_libsAOT_INDUCTOR_ENABLE_LTOlink_libtorchcross_target_platformaoti_shim_libraryaoti_shim_library_path)8r  r  r   r6  output_pathr   r   r    debug_compiledebug_symbolscompile_wrapper_opt_levelr-  r   r"  filtered_kernel_namesserialized_in_specserialized_out_specr/  r!  r0  use_consts_asm_buildr1  r2  r   r3  r4  r"  r  r6  r7  r9  r   r;  r<  r   r=  r>  r@  rA  rB  r
   rC  rD  rE  rF  rG  r/   _ops
OpOverloadlistrH  
enable_ltorJ  rK  rL  rM  r   r   r   r%  r%  ,  s          KJNN#?EELMJNN#?EELM !#
0$! ! EGJNN7E E$g.@&A   
 JNN0$    */ $...  %$$$  GT'+htn+++ !OT     "Hd38n!!! 	
H#NNRUU (    "d!!!  "z~~.BCHHCOOOO s2:>>*CQGGHHKHHH !GT#s(^    $)D((( ,1"D000 	
BCHHCO !$    %)T((( 7;$hsm::: $-9;;... +/$... .2HTN111 59"HSM888 EG4
 5tCy @AFFF*.NHT#Y'...  93??3FJ M4 ,08C=/// 48xd3i0777,0HSM00000r   r%  c                       e Zd ZU dZeed<   dS )aot_inductor_modeFcompile_standaloneN)r  r  r   r\  r!  r"  r   r   r   r[  r[    s%           %$$$$$r   r[  c                   v   e Zd ZU dZdZed         ed<   dZdZe	j
                            e	j                            de	j
                            e	j
                            ej                  d                              ZdZee         ed	<   g d
Zee         ed<    eeeeef          ed e	j                            dd                              d          D                                 Zeeeef         ed<    eeeeef          ed e	j                            dd                    d eD                                                     d          D                                 Zeeeef         ed<   e	j                            dd          dk    ZdZdZeed<   e	j                            dd          dk    Z e!ed<   e	j                            d          Z"ee#         ed<   e	j                            d          Z$ee#         ed<   e	j                            d d          Z%e#ed!<   e	j                            d"d          dk    Z&e!ed#<   e	j                            d$d          dk    Z'e!ed%<   e	j                            d&d'          Z(e#ed(<   d)Z)e!ed*<   dZ*e!ed+<   dZ+e!ed,<   d)Z,e!ed-<   dS ).cutlassz-
    Config specific to cutlass backend.
    -O1)-O0r_  -O2-O3z-OScompile_opt_levelFTORCHINDUCTOR_CUTLASS_DIRz../third_party/cutlass/Ncutlass_max_profiling_configs)rv   r,   r   r   %cutlass_max_profiling_swizzle_optionsc              #   4   K   | ]}t          |          V  d S r   r   .0xs     r   	<genexpr>zcutlass.<genexpr>  s<       
 
 FF
 
 
 
 
 
r   +TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_SHAPEz2,1,1r   cutlass_dynamic_cluster_shapec              #   4   K   | ]}t          |          V  d S r   rh  ri  s     r   rl  zcutlass.<genexpr>  s<       
 
 FF
 
 
 
 
 
r   .TORCHINDUCTOR_CUTLASS_DYNAMIC_CLUSTER_FALLBACKc              #   4   K   | ]}t          |          V  d S r   )r  )rj  vs     r   rl  zcutlass.<genexpr>  s(      GGAQGGGGGGr    cutlass_dynamic_cluster_fallbackCUTLASS_EPILOGUE_FUSIONr   r   rv   cutlass_backend_min_gemm_size/INDUCTOR_CUDA_BACKEND_GENERATE_TEST_RUNNER_CODEgenerate_test_runnerTORCHINDUCTOR_CUTLASS_ALLOWLISTcutlass_op_allowlist_regexTORCHINDUCTOR_CUTLASS_DENYLISTcutlass_op_denylist_regex)TORCHINDUCTOR_CUTLASS_INSTANTIATION_LEVELcutlass_instantiation_level+TORCHINDUCTOR_CUTLASS_HASH_WITH_COMPILE_CMDcutlass_hash_with_compile_cmd"TORCHINDUCTOR_CUTLASS_PRESCREENINGcutlass_prescreening!TORCHINDUCTOR_CUTLASS_ENABLED_OPSr   cutlass_enabled_opsTuse_binary_remote_cacheupload_to_binary_remote_cachebinary_remote_cache_force_writeenable_caching_codegen)-r  r  r   r6  rc  r   r"  enable_debug_infouse_fast_mathr   pathrealpathr   r    joindirnamer/   __file__cutlass_dirre  r   r   rf  rX  r   r  r   rn  rs  cutlass_epilogue_fusion_enabledcutlass_tma_onlyru  rw  r!  ry  r  r{  r}  r  r  r  r  r  r  r  r   r   r   r^  r^    s          EJw@AIII  M '""

'GLL//) 	
 	
 K 48!8C=777 8D||)49CCC:>$c3m 
 
Z^^=w eCjj	
 
 
 	
 	
; ;!5c3#7    >BTc3m 
 
Z^^@GG)FGGGGG  eCjj
 
 
 	
 	
	> 	>$eCcM&: 	 	 	 	
0#66#= $
  *+!3***
 	
H#NNRUU $    13
)1 1    02z~~(0 0x}    (*z~~3S( (    	
DcJJcQ "4    	
;SAASH $     "z~~+U      
 %)T((( +0!4/// -2#T111 $(D'''''r   r^  c                       e Zd ZU dZee         ed<   dZee         ed<   dZee         ed<   dZ	dZ
dZee         ed<   dS )cudaNarchr3   cuda_cxxF   r   )r  r  r   r  r   r  r"  r3   r  enable_cuda_ltoenable_ptxas_infor   r   r   r   r   r  r  l  sz         
 D(3-
 "GXc]!!! #Hhsm""" O  34 (3-33333r   r  c                   D    e Zd ZU dZee         ed<   dZee         ed<   dS )xpuNr  r3   )r  r  r   r  r   r  r"  r3   r   r   r   r  r    s>          D(3- "GXc]!!!!!r   r  c                      e Zd ZU g Zee         ed<   g dZeed                  ed<   dZ	ed         ed<   dZ
dZdZdZdZd	Zee         ed
<   ej                            d          Zej                            dd          dk    Zeed<   d	Zee         ed<   d	Zee         ed<   d	Zee         ed<   dZeed<   d	Zeee                  ed<   dZeed<   dZeed<   d	S )rocmr  )gfx90agfx942gfx950ck_supported_archra  )	r`  r_  ra  rb  z-Osz-Ozz-Ominz-Ofastz-Omaxrc  FTN	rocm_homeTORCHINDUCTOR_CK_DIR-INDUCTOR_CK_BACKEND_GENERATE_TEST_RUNNER_CODEr   r   rw  n_max_profiling_configsck_max_profiling_configsck_tile_max_profiling_configsuse_preselected_instanceskBatch_sweepr  split_k_thresholdcontiguous_threshold)r  r  r   r  rX  r  r"  r  r   rc  is_debug
save_tempsr  flush_denormalsprint_kernel_resource_usager  r   r   r   r    ck_dirrw  r!  r  r   r  r  r  r  r  r  r   r   r   r  r    s         D$s)F F FtG$@AB    	 wL   
 H J M O #(  $Ix}### Z^^233F 	
FLLPSS $   
 .2Xc]111 /3hsm222 48!8C=777 ',t+++ )-L(49%,,,  s !##"""""r   r  )r  r}   halidepallascpu_backend)r}   r  r  cuda_backendr  tpu_backendxpu_backendc                   X    e Zd ZU dZdZdZed         ed<   dZed         ed<   dZ	dZ
dZd	S )
r  hostz	host-cudaAnderson2021)r  Li2018	Adams2019Mullapudi2016scheduler_cudar  scheduler_cpuFN)r  r  r   
cpu_target
gpu_targetr  r   r"  r  assertsdebugscan_kernelsr   r   r   r  r    ss         J J
 	 GRS    	 7QR   
 G E LLLr   r  c            	          e Zd ZU ej                            dd          dk    Zej                            dd          dk    ZdZe	e
         ed<   dZdZdZdZdZdZdZej                            d	d          dk    Zej                            d
d          dk    Zej                            dd          Zej                            dd          ZdZdZe	ee
gdf                  ed<   ej                            dd          dk    Z eej                            dej                            dd                              Zeed<   dS )traceTORCH_COMPILE_DEBUGr   r   TORCH_COMPILE_DEBUG_SAVE_REALN	debug_dirFTINDUCTOR_POST_FUSION_SVGINDUCTOR_ORIG_FX_SVGINDUCTOR_DOT_GRAPH_SHAPE_SVG INDUCTOR_LOG_URL_FOR_GRAPH_XFORM
upload_tarLOG_AUTOTUNE_RESULTSINDUCTOR_PROVENANCEprovenance_tracking_level)r  r  r   r   r   r    enabledsave_real_tensorsr  r   r  r"  	debug_loginfo_logfx_graphfx_graph_transformedir_pre_fusionir_post_fusionoutput_codegraph_diagramdraw_orig_fx_graphdot_graph_shapelog_url_for_graph_xformcompile_profiler  r   log_autotuning_resultsr   r  r   r   r   r  r  	  s        jnn2C88C?G 
'FLLPSS  $Ix}### I H H   M N K JNN#=sCCsJM (>DDK jnn%CTJJO !jnn-OQUVV O 37J3%+./666Z^^,BCHHCO &)S

!2:>>2G#M#M	
 	
& &s     r   r  )
ztrace.upload_tarrn   ro   rp   zaot_inductor.repro_levelzaot_inductor.dump_aoti_minifierrk   rl   r  rq   _save_config_ignore)r  zcuda.cutlass_dirzcutlass.cutlass_dirzxpu.cutlass_dirr  rL  rl   rk   rn   ro   r  rq   rn  rD   rF   rJ   rK   _cache_config_ignore_prefixexternal_matmul8TORCHINDUCTOR_WRITE_ARE_DETERMINISTIC_ALGORITHMS_ENABLEDc                   d    e Zd ZU dZeeeeeeef                  f                  e	d<   dZ
ee	d<   dS )lookup_tableNtableTcheck_src_hash)r  r  r   r  r   r"  r  rX  r   r"  r  r!  r   r   r   r  r  	  sP         7;E8Dd4S>2234;;;  NDr   r  c                   f   e Zd ZU dZeed<   dZee         ed<   dZ	dZ
dZej                            d          Zee         ed<   ej                            d          Zee         ed<   dZdZeed	                  ed
<   dZdZeed<    ej        d          dk    Z ej        dd          ZdZdZdS )test_configsF%force_extern_kernel_in_multi_templateNmax_mm_configs(TORCHINDUCTOR_AUTOTUNE_CHOICE_NAME_REGEXautotune_choice_name_regex(TORCHINDUCTOR_AUTOTUNE_CHOICE_DESC_REGEXautotune_choice_desc_regex)assertrK  track_memory_lifecycleT assume_bucketing_reduces_latency,TORCHINDUCTOR_FORCE_FILTER_REDUCTION_CONFIGSr   )TORCHINDUCTOR_DISTORT_BENCHMARKING_RESULTr   )r  r  r   r  r!  r"  r  r   r   runtime_triton_dtype_assertruntime_triton_shape_assertstatic_cpp_dtype_assertr   r   r    r  r  r  *graphsafe_rng_func_ignores_fallback_randomr  r   use_libtorchr  r  force_filter_reduction_configsdistort_benchmarking_resultbisect_pre_grad_graph'bisect_keep_custom_backend_for_inductorr   r   r   r  r  	  s+        27)4777$(NHSM((("'"'# 13
21 1    13
21 1    27.AEHW_%=>EEE L .2$d111 		@AASH # #,")3R# # ".3+++r   r  )*c                   f    e Zd ZU ej                            dd          dk    Zeed<   dZ	eed<   dS )eager_numerics'TORCHINDUCTOR_EMULATE_DIVISION_ROUNDINGr   r   division_roundingFdisable_ftzN)
r  r  r   r   r   r    r  r!  r"  r  r   r   r   r  r  	  sU         
 	
@#FF#M t    Kr   r  %TORCHINDUCTOR_EMULATE_PRECISION_CASTSemulate_precision_casts(Z  r   rE  collections.abcr   typingr   r   r   r   r   r	   r/   !torch._inductor.custom_graph_passtorch._environmentr
   torch.utils._config_moduler   r   r   r   torch._inductor.choicesr   r   r    inplace_paddingcan_inplace_pad_graph_inputr!  r   r!   r$   r'   r*   r4   r9   enable_auto_functionalized_v2r  disable_progressverbose_progressworker_log_pathr   r=   r"  rD   rE   rF   rG   rI   rJ   rK   rL   rN   rO   rP   r"  r  rQ   rT   rV   rX   rZ   r[   online_softmaxapply_gumbel_max_trickdcestatic_weight_shapessize_assertsnan_assertsruntime_triton_nan_assertsscalar_assertsalignment_assertspick_loop_ordersinplace_buffersallow_buffer_reusememory_planningr  rj   benchmark_harnessepilogue_fusionprologue_fusionepilogue_fusion_firstpattern_matcherb2b_gemm_passrk   r   custom_graph_passCustomGraphPassTyperl   rm   CustomPartitionerFnTypern   ro   rp   r8  graphGraphrq   rX  rr   split_cat_fx_passes efficient_conv_bn_eval_fx_passesis_predispatchgroup_fusionbatch_fusionrs   rt   reorder_for_localitydynamic_scale_rblockforce_fuse_int_mm_with_muluse_mixed_mmr{   r    reorder_for_compute_comm_overlapr   r   reorder_for_peak_memoryreorder_for_peak_memory_debugr   r   r   r   r   r   r   estimate_op_runtimer   intra_node_bwinter_node_bw
cpu_gpu_bwr   distributed_max_autotune_gemmpipeline_max_autotune_gemmmax_autotunemax_autotune_pointwisemax_autotune_gemmr   inductor_default_autotune_warmupinductor_default_autotune_repr   r   !max_autotune_report_choices_stats.max_autotune_prune_choices_based_on_shared_memtriton_disable_device_detectionr   r   r   r   r   r   uppermax_autotune_gemm_backendsr   r   max_autotune_conv_backendsr   r   r   r   r   r   r   r   r   autotune_fallback_to_atenunbacked_symint_fallbacksearch_autotune_cache	save_argsautotune_in_subproc+max_autotune_subproc_result_timeout_seconds-max_autotune_subproc_graceful_timeout_seconds.max_autotune_subproc_terminate_timeout_secondsautotune_multi_devicecollective_benchmark_nrunsr7  collective_benchmark_timeoutcoordinate_descent_tuning'coordinate_descent_check_all_directions coordinate_descent_search_radiusr   r   run_jit_post_compile_hookr   r   r   autoheuristic_log_pathr3   r  layout_opt_defaultlayout_optimizationforce_layout_optimizationkeep_output_stridewarn_mix_layoutrealize_reads_thresholdrealize_opcount_thresholdrealize_acc_reads_thresholdr   fallback_random"fallback_embedding_bag_byte_unpackimplicit_fallbacks assume_unaligned_fallback_outputr   aggressive_fusionr   r   enabled_metric_tablesr   r   score_fusion_memory_thresholdbenchmark_epilogue_fusion max_epilogue_benchmarked_choicesmax_fusion_size)max_fusion_buffer_group_pairwise_attemptsr   max_pointwise_cat_inputsforce_pointwise_catunroll_reductions_thresholdcomment_originconv_1x1_as_mmsplit_reductionsdeterministicmin_num_splitbenchmark_kernelconstant_and_index_propagationalways_keep_tensor_constantsassert_indirect_indexingcompute_all_boundscombo_kernelsbenchmark_combo_kernelcombo_kernels_autotunecombo_kernel_allow_mixed_sizes#combo_kernel_foreach_dynamic_shapescombo_kernel_max_num_args!combo_kernel_per_subkernel_blockscombo_kernels_pointwise_onlyjoint_graph_constant_foldingdebug_index_asserts__version__is_nightly_or_sourcedeveloper_warnings"optimize_scatter_upon_const_tensorr   r   r  r
  r  r  r  r  _fuse_ddp_communication_fuse_ddp_bucket_sizer  r  partitioned_scatter_enabledr  r  r  r  r$  r<  rM  rL  rO  rQ  rR  rS  rU  rW  rX  libfb.pyrZ  __package__get_dir_pathr  r  replaceseprY  
ValueErrorImportErrorkernel_name_max_opsshape_paddingcomprehensive_paddingpad_channels_lastpad_dynamic_shapesdisable_padding_cpu$expand_dimension_for_pointwise_nodespadding_alignment_bytespadding_stride_thresholdpad_outputsbw_outputs_user_visibler^  permute_fusionprofiler_mark_wrapper_callgenerate_intermediate_hooksdebug_ir_traceback_raise_error_for_testingr`  _profile_varprofile_bandwidthprofile_bandwidth_regexrc  /profile_bandwidth_with_do_bench_using_profilingdisable_cpp_codegenrf  rg  rh  ri  rj  rk  rl  rm  rn  enable_linear_binary_foldingrq  rr  rs  rv  rw  r~  r  r  r  r}   r%  r[  r^  r  r  r  r  r  r  r  r  r  r  r  r  Tensor*write_are_deterministic_algorithms_enabledr  r  torch.utils._config_typingr  r  modulesr  r   r   r   <module>r     sz   					 



 $ $ $ $ $ $ E E E E E E E E E E E E E E E E  ( ( ( ( ( ( ( ( ( (             8777777*..!@#FF#M# Cx~ C C C CHTN    Cx~ C C C CKx~ K K K K8D>    d    
 
 
 
 
 JNN7==D 
 	     8Ay{{L33 
 '*cJNNA6JJ' '    
 v?;1	      $) D ( ( ( )F(E(G(G x~ G G G .-// "8D>    )/MB) ) )     " d ! ! ! )F(E(G(G x~ G G G 1V0U0W0W x~ W W W $V*VWWW d W W W 05 & 4 4 4 57 !4S> 6 6 6 )-  , , ,  (1*    JNN#>DDKT K K K JNN=sCCsJ D    :>>"<cBBcI
D I I I
 *3_ d 4 4 4 >DDK JNN93??3F 
    z~~:C@@CGjnn899S@JNN=>>#E   >DDK JNN4YY[[6QcccRR
       *..!@#FF#M 
<==D HJz~~H HWCD   
    *)++     TX 5?<P W W WTX EO=Q X X X TX u8P W W W PT u8L S S SPT 9M T T T
 JN hx)=(>(DEF M M M 	 	;	<=:;	= 	 	 	 	 (	;	<=:;	= 	 	 	   $)     ( 68 c4S>12 7 7 7 79 $sDcN23 8 8 8   z~~&JCPPTWW  #   	+ + c3h    FQAB P P P $)  .  (	?@A>?A	
	*    )-  , , ,   %  /0 & / / / >D w9: C C CQU .3%*9M0N U U U4: 7=1 : : : 	 3HXseSj=Q4R    17 w}- 6 6 6QU .3%*9M0N U U U   */ !4 / / /   
 &,V?<& & & d    JNN@AASH  JNN;<<C 
 z~~:;;sB (NOOSVV  JNN#DEEL #&3BI5r::$ $   !$BI2C88! ! #     ,+--     JNNDcJJcQ " JNNQSVWW
 / JNNBCHHCO   JNN2yy{{4SCCPSTT
     *, T#Y + + + ,. tCy - - - -2 #T 1 1 1 $V47   d    !# DI " " "  Z^^.0A %'' x}     /T.S.U.U hsm U U U  Z^^. %''  DF:>>2ID D%'' (? @ 
 
 
 DF:>>2ID D%'' (? @ 
 
 
 " T ! ! ! " T ! ! !     ! T      #  # # #! d ! ! ! JNN.44; 4   
 "      JNN455<	 jnn%HIISP 
 /3 +03 -14 . 
'LMMQTT  !SJNN=tDD  
  %uJNN?FF    
 JNN<==D  JNNJKKsR ( $'3JNN<cBB$ $   
'LbQQ JNN#DjQQ  JNN<cBBcI 
BC BD B B B BK K K K K KGC GD G G G G *I  
 !& 1:SSs JNN68JKKsR  JNN+KSQQUXX  Z^^$FLLPSS  *..!@AASH
       !(3-   
  &+ "  JNNCDDK !
 EI ".?*?!@A H H H   Z^^$@AASHd H H H(HIISP $ P P P
'LbQQ JNN299;;4OCCC   D     (,  + + + !#  JNN<cBBcI 
 $%    -/ ) /3 hsm 2 2 2          29=sCCsJ  	788C? BJNN#@!DDEE:>>"BCHHCO  "&   %           "# &* #  %* !$   $     11OUe>O5O Y[[8$8  JNNEsKKsR #
 &* Xc] ) ) )(,  , , , v2     
C 
 
 
 
 6577 S 7 7 7 &. s - - - !'7:! ! !     F   T         I U8I+>+C%D E   
 ! D       JNN>DDK 
 +, "C + + + +. "C - - - ,0 !5 / / /: : : : : : : :
58 58 58 58 58 58 58 58p6T 6 6 6 6! ! ! ! !J *3!R:P:P:R:R# R R R $*6:=$ $ $ D    #)&# # # C    ">!=!?!? $ ? ? ? $*6
;$ $ $ D    39&IL3 3 3 )4    JNN>DDK T   
 '-f
>' ' ' t   
 3-   9;; 
 $$$$$$ 	@3w3[00bf==zJJ     4w3J??$           
<cBBcI JNN8#>>#E        (- $  (            >DDK # 
 $    !  +4)++o t 5 5 5z~~5r:: B&  , 3 3""  +-*.."D+ + (3-   
 JNNHIISP 0    8#>>#E$ E E E %* T ) ) )  %  $ $ $ (-  , , ,
 $ t # # # $ t # # # 8= . < < < ,1 "D 0 0 0 49 *D 8 8 8 JNN?EEL  *..)JCPPTWW 4 W W W 37 )4 6 6 6 46 tCc3h/0 5 5 5RZ^^,MuUUVV 3 V V V %  % % %	Xc] 	 	 	 	 %+F1% % % c          "@ @ @ @ @ @ @ @FH H H H H H H HV
o1 o1 o1 o1 o1 o1 o1 o1j% % % % % % % %( ( ( ( ( ( ( (D W4 4 4 4 47 4 4 4< W" " " " "' " " "F# F# F# F# F# F# F# F#T =BW89 A A A 7?g23 > > > "*WX ) ) ) "*WX ) ) )       8K K K K K K K K\" " " T#Y   * * * T#Y   6 UWhelELI4OPQ V V V BIH#NNRUU +
               .4 .4 .4 .4 .4 .4 .4 .4b  -,,,,       ( JNN:C@@CG    
  ck(+ , , , , ,s   A,{< <|
|