
    Wjl                      d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZmZmZmZmZ d dlZd dlmZ d dlZd dlZd dlZd dlmZmZ d dlmZ d d	lmZmZ d d
l m!Z! d dl"m#Z# d dl$m%Z% d dlm&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z;m<Z< d dl=m>Z> d dl?m@Z@ d dlAmBZB d dlCmDZD d dlEmFZF ddlGmHZHmIZImJZJ ddlKmLZLmMZMmNZNmOZOmPZPmQZQmRZRmSZS ddlTmUZUmVZVmWZWmXZX ddlYmZZZ ddlIm[Z[m\Z\m]Z]m^Z^m_Z_m`Z`maZambZbmcZcmdZdmeZemfZfmgZg ddlhmiZimjZjmkZkmlZlmmZmmnZnmoZompZpmqZqmrZrmsZsmtZt dd lumvZv dd!lwmxZx dd"lymzZz dd#l{m|Z|m}Z}m~Z~mZmZmZmZmZmZmZmZmZmZ dd$lmZmZ er@d d%lmZmZmZmZ d d&lmZ d d'lmZ d d(lmZ d d)lmZ dd*lmZ dd+lmZ dd,lmZ eeeNf         Zd d-lmZ  ej        e          Zej                            ed.          Zej        j        Z ej                    Z eHj                    rd d/lmZ ndSd5ZdTd9ZdUd<ZdVdBZdWdFZdXdIZdYdKZdZdNZ G dO dPej        j                  Z G dQ dRe          ZdS )[    )annotationsN)defaultdict)contextmanager)AnyNoReturnOptionalTYPE_CHECKINGUnion)Expr)deviceTensor)get_decompositions)defakedynamo_timed)FakeScriptObject)is_opaque_type)get_layout_constraint_tag)
LazyStringtrace_structured)compute_required_storage_lengthmake_channels_last_strides_for)
FakeTensor)full_aoti_runtime_assert)BackwardState)magic_methodsmethod_to_operator)_get_placeholder_exprfree_unbacked_symbolshas_free_symbolsresolve_unbacked_bindingsRuntimeAssertShapeEnvSympyBooleanSymTypes)Node)_is_view_op)no_dispatch)
OrderedSet)int_oo   )configirmetrics)BackendFeatureDeviceOpOverridesFileBackedGraphModuleget_backend_featuresget_device_op_overridesget_wrapper_codegen_for_deviceinit_backend_registrationWorkspaceArg)CppWrapperCodegenErrorLoweringExceptionMissingOperatorWithDecompMissingOperatorWithoutDecomp)count_flops_fx)assign_origin_nodeConstantDonatedBufferFixedLayoutget_device_typeGraphPartitionSignatureInputBuffer	Pointwise	ReductionShapeAsConstantBuffer
StorageBox	TensorBoxTorchBindObject)constrain_to_fake_tensorsconstrain_to_fx_stridesFALLBACK_ALLOW_LISTfallback_handler%fallback_node_due_to_unsupported_type	loweringsmake_fallbackmaybe_layout_constraintsneeds_realized_inputsrequire_contiguoustag_to_layout_constraintunsupported_output_tensor)autotune_cache)AutotuneCacheBundler)SizeVarAllocator)convert_shape_to_inductorgather_origins get_cloned_parameter_buffer_nameget_donated_idxsget_sympy_Expr_dtypeGraphPartitionMapis_same_tensor#maybe_get_suppress_shape_guards_ctxnormalize_nameshould_assume_input_alignedshould_fallback_by_defaultSUPPORTED_MKLDNN_DEVICESValueWithLineMap)NullHandlerV)CallableIterableIteratorSequence)
ModuleType)_EffectType)GraphModule)Graph)PythonWrapperCodegen)Dep)BaseSchedulerNode)output_code_log
perf_hints)log_module_codeargsr   kwargsreturnNonec                     d S N )rt   ru   s     Z/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/_inductor/graph.pyrs   rs      s        constant_buffer
sympy.ExprOptional[torch.dtype]c                   t          | t          j        t          j        t          j        j        j        f          s
J d            t          | t          j        j        j                  rt          j        S t          | t          j                  rt          |           S | j
        rt          j        S | j        rt          j        S d S )Nzgget_constant_buffer_dtype only supports input of sympy.Symbol, sympy.Expr or sympy.core.numbers.Integer)
isinstancesympySymbolr   corenumbersIntegertorchint64r[   
is_integeris_floatfloat32)r}   s    r{   may_get_constant_buffer_dtyper      s    %,
EJ4F4NO    	r  
 /5:#5#=>> {/5:.. 5#O444! {		! }tr|   opboolc                F    t          d t          D                       }| |v S )Nc              3  4   K   | ]}t          |          V  d S ry   )r   ).0ms     r{   	<genexpr>z"is_magic_method.<locals>.<genexpr>   s+      HHQ-a00HHHHHHr|   )r(   r   )r   	magic_opss     r{   is_magic_methodr      s(    HH-HHHHHI?r|   objrl   targetstr1Union[Tensor, torch._C.ScriptObject, GraphModule]c           	         |                     d          }| }t          |          D ]R\  }}t          ||          s-t          dd                    |d |                              t          ||          }S|S )N.z#Node referenced nonexistent target )split	enumeratehasattrRuntimeErrorjoingetattr)r   r   target_atomsattr_itriatoms         r{   getattr_recursiver      s     <<$$LH\** + +4x&& 	Rchh|BQB?O6P6PRR   8T**Or|   grm   dict[Node, tuple[int, ...]]c                J   i }|                      d          d         }d|j        vr|S t          |j        d         t          j        j                  s|j        d         }n|j        }t          |          D ]*\  }}||j        d         v r|j        d         |         ||<   +|S )Noutputr   r   user_visible_output_idxsoriginal_output_strides)
find_nodesmetar   rt   r   fxr%   r   )r   retoutput_nodeoutput_node_argsidxnodes         r{   get_user_visible_output_stridesr      s    ')C,,(,++A.K!)999
k&q)58=99 ,&+A.&+/00 I I	T+"#=>>>#()BCCHCIJr|   user_visible_outputsdict[Node, object]c                   i | }g |                                 }t          g |          }|r|                                }t          |j                  r|j        r{t          |j        d         t          j        j	                  rQ|j        d         }||vr@|
                    |d           |                    |           |                    |           ||S )zc
    Extend user_visible_output_strides to include view ops that lead to user-visible outputs.
    r   N)keysr(   popr&   r   rt   r   r   r   r%   
setdefaultaddappend)r   resultqueuevisitedcurrentbases         r{   "extend_user_visible_output_stridesr      s     ":$8!9FfkkmmE5""G
 #))++''		#		# 7<?EHM::		#
 <?D7""!!$---D!!!T"""  # Mr|   user_visible_output_stridesc                   t           j        sdS t          |          }t          t          j        t          j        t          j        g          }t          t          j        t          j	        t          j
        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        t          j        g          }d
d}t)          | j                  D ]	}t-          |j        t0          j        j        j                  rd|j        d<   7t-          |j        t0          j        j                  r7t?          |j                  t0          j         j!        j"        k    rd|j        d<    ||          }|s||v r
d|j        d<   |j        #                    dd	          r&|j$        D ]} ||          }	|	s|	|vr
d|j        d<   t           j%        s||v r
d|j        d<   dS )a  
    Nodes like convolution/convolution_backward want its input to be dense.
    If we pad their inputs, we result in extra calls to copy kernels!  On the other hand, padding usually helps reduction.

    The pass finds nodes that dislike padding. These are nodes that can be reached
    from a convolution/convolution_backward in the backward direction without
    going thru a reduction.
    Nr   torch.fx.Noderv   %Optional[torch._ops.OpOverloadPacket]c                    | j         dk    rEt          | j        t          j        j                  r!t          | j        d          r| j        j        nd S )Ncall_function_overloadpacket)r   r   r   r   _ops
OpOverloadr   r   )r   s    r{   _get_overload_packetz8mark_nodes_dislike_padding.<locals>._get_overload_packet  sX    
 w/))4;
(=>> * %677 * K''
 	
r|   Tdislike_paddingF)r   r   rv   r   )&r+   comprehensive_paddingr   r(   atenconvolutionconvolution_backward
_scaled_mmvar_meansummeanprodanyaminamaxminmaxargminargmaxscatter_reducereversednodesr   r   r   _higher_order_opstriton_kernel_wrapTritonKernelWrapperMutationr   r   r   r   _CTagneeds_exact_stridesgetall_input_nodespad_outputs)
r   r   extended_user_visible_nodesops_dislike_paddingops_like_paddingr   curr   priorprior_ops
             r{   mark_nodes_dislike_paddingr      s    ' "D## # %%O	
  "MHIIHIIHHKK	
 "

 

 

 

     /  /J#6R
 
 	 +/CH&' sz5:#899	)#*55x|/0 0 +/CH&'!!#&& 	$$$*.CH&'8<<)511 	9, 9 9//66 #33348EJ01! 	/c-H&H&H*.CH&'A /  /r|   r   r%   c                (   t          t          j        dd           t          t          j        j        dd           t	          | j        t          j                  rt          | j        j	                  dk    rt          | j        j	        d         d          rz| j        j	        d         j        D ]b}|j        d         t          j        j        j        j        t          j        j        j        j        t          j        j        j        j        fv r dS cdS )Nmkldnn_convolution_pointwiser   targetsTF)r   r   opsr   r   r   	functoolspartiallenrt   r   r   fnsr   defaultbinary_convolution_pointwise_)r   r   s     r{   is_mkldnn_convr   C  s     		8T**6EI$&>EEQt{I$566 R !!A%%DK$Q'33 & k&q)1 	 	Fz!}	 7?	 7>	 8?!  
 tt 5r|   c                      e Zd ZU ded<   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd fd$Zdd%Zdd)Zdd+Zdd/Zdd2Z	dd7Z
ddd=Zdd?Zej        ddA            ZddCZeddE            ZddFZddJZddLZddMZddNZeddP            ZddSZddVZddXZddZZdd\Zdш fd_ZddbZ ddcddgZ!ddjZ"ddmZ#ddnZ$ddoZ%ddrZ&	 ddduZ'ddxZ(dۈ fd~Z)d܈ fdZ*edd            Z+ddZ,ddZ-ddZ.d fdZ/ddZ0edd            Z1edd            Z2ddZ3d fdZ4ddZ5ddZ6	 	 	 	 dddZ7ddZ8ddZ9ddZ:ddZ;ddZ<ddZ=dZ>ded<   ddZ?ddZ@ddZAddZBddZCddZD xZES )GraphLoweringlist[ir.IRNode]graph_outputsNFgmtorch.fx.GraphModuleexample_inputsOptional[Sequence[object]]	shape_envOptional[ShapeEnv]graph_idOptional[int]cpp_wrapperr   aot_mode
layout_optOptional[bool]extern_node_serializer4Optional[Callable[[list[ir.ExternKernelNode]], Any]]is_inferenceis_backwardis_const_graphconst_output_indexOptional[dict[str, int]]const_wrapper_codeOptional[str]const_kernel_codeconst_moduleOptional[GraphLowering]nameinputs_to_checkOptional[Sequence[int]]
fx_wrapperrv   rw   c                    t                                          |           || _        ||n|                     ||	          | _        d| _        |	| _        |
| _        || _        || _	        || _
        || _        || _        d| _        |t                      }d| _        nd| _        || _        |j                                        | _        t)          t*          j                             | _        t1          |          | _        g | _        i | _        i | _        d | _        t)                      | _        |r|j        nt)                      | _        |r|j         nt)                      | _         d| _!        tE          t(                    | _#        tE          t(                    | _$        i | _%        g | _&        g | _'        |r|ni | _(        |r!t)          |)                                          nt)                      | _*        |r|j+        ni | _+        |r|j,        ni | _,        |j-        .                    dt)                                | _/        |r|j0        ni | _0        i | _1        i | _2        i | _3        i | _4        t)                      | _5        t)                      | _6        t)                      | _7        t)                      | _8        t)                      | _9        t)                      | _:        d | _;        d | _<        ddl=m>} t          j@                    r|r|n|| _A        d | _B        i | _C        t)                      | _D        g | _E        i | _F        tE          t                    | _H        i | _I        t          jJ                    | _K        || _L        || _M        || _N        || _O        i | _P        || _Q        || _R        t          t                    | _U        d | _V        d | _W        d | _X        d | _Y        d | _Z        | j        r| [                                nt)                      | _\        t)          dg          | _]        t          |j_                  | _`        t          |j_        | j`                   d	| _b        d	| _c        g | _d        d | _e        i | _f        |g                                | _h        | jh        ,                                D ]\  }}|| j,        |<   | jh        0                                D ]\  }}|| j0        |<   | ji        j-        .                    d
i           | _j        ||jk        ni | _k        t                        t          jn        d           t                    | _o        i | _p        t)                      | _q        t)                      | _r        i | _s        t)                      | _t        t)                      | _u        t)                      | _v        t          jx                    | _y        d| _z        t                      | _|        i | _}        d S )N)r  r   FTcpumutated_named_buffers)extern_node_json_serializerzaten.convolution_backward  dynamo_flat_name_to_original_fqn)~super__init__r  decide_layout_optr  num_channels_last_convr  r  r  r  r  r  r  extra_tracebackr"   reuse_shape_env
_shape_envdeferred_runtime_assertscopyras_by_symbolr(   r   r   bound_unbacked_symbolsrV   sizevarsgraph_input_namesgraph_inputsgraph_inputs_originalpartition_mapszero_dim_cpu_tensor_listdevice_typesdevice_idxsdevice_typer   additional_buffer_depsadditional_star_depsbuffer_to_padded_sizebuffers
operationsr  r   folded_constants	constantsnamed_buffersr   r   r"  named_parameterstorchbind_constantsopaque_value_type_classesseen_subgraphsconstant_reprsremoved_operationsremoved_buffersremoved_inplace_buffersmutated_buffersnever_reuse_buffersinplaced_to_remove
device_opswrapper_code&torch._inductor.extern_node_serializerr#  r+   	is_fbcoder  current_nodelistsmutated_inputsmutated_input_idxsname_to_bufferlistname_to_users
name_to_optimecreation_timer  r  r  record_multi_kernel_choicemulti_kernel_to_choicer  r
  next_post_grad_graph_counterpost_grad_graph_id	schedulerautotuning_inputsautotuning_mappingautotuning_gridscurrent_devicefind_nodes_prefer_channels_lastnodes_prefer_channels_last_warned_fallbackr   graphr   r   	cache_key
cache_pathcache_linemapdisable_cudagraphs_reasondevice_node_mapping__copy__orig_gmmoduler%  allocated_constant_namer4   r   	lru_cacher1   effectful_opsunaligned_buffersno_fuse_buffer_namesbuffer_layout_constraintslow_precision_codegen_opsinvoke_quant_opsall_codegen_kernel_names	itertoolscountworkspace_idplaceholder_idxrZ   bw_donated_idxsdep_size_hint_cache)selfr  r  r  r
  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  kv	__class__s                         r{   r(  zGraphLowering.__init__[  s   . 	, % J'''FF 	
 '(#(&,"4!2(.$ 

I#(D  #'D # .3355 	 '1&>&@&@#(33,.VX=?"AE9C%)5GL%%:<< 	 )5FL$$*,, 	 !BMC
 C
# ALJ@W@W! <>"(*.0"4<" 	
 "J)..00111 	 '3:L"" 	 +7>L&&B 	 @Bw{{#Z\\@
 @
" .:AL))r 	
  	  ;=&68.03=<<0:8B$0:4>LL 3=<<-126VVVVVV !!-&<-"", 	# ,0+-
/9||-/46@KD@Q@Q35!Y[[	&$
 +6'68#  "&'?"@"@>B @DGK:> 7; 7;oWD00222:<< 	' !+,G+H I I+J28+T+T("28T-MNNN !  	 9=& GI -/[[]]L..00 	& 	&DAq$%Dq!!L1133 	) 	)DAq'(D!!$$040@0D0D.1
 1
- 5A4LL00RT 	$ 	"###$=I$7$=$=>R$S$S!;= 3=,,5?\\! EG&:D,,&1; :D% &O--  "/11 AC   r|   c                8    | j                                          d S ry   )r-  freeze_runtime_assertsr  s    r{   r  z$GraphLowering.freeze_runtime_asserts.  s    ..00000r|   extorch.Tensor=tuple[Sequence[Union[int, Expr]], Sequence[Union[int, Expr]]]c                `   | j         rBt          |                                          t          |                                          fS ddlm}  |dt          | j        j                             }| j        	                    ||          \  }}}d |D             }d |D             }||fS )z
        Support dynamic shapes and dynamic strides by assigning variables
        to each dimension.  We duck-shape tensors, so if two tensors
        have the same size they get assigned the same symbolic variable.
        r   )ConstantSource__inductor_unknown_tensor_c                ^    g | ]*}t          |t          j                  r|j        j        n|+S rz   r   r   SymIntr   exprr   r   s     r{   
<listcomp>z8GraphLowering.symbolic_sizes_strides.<locals>.<listcomp>R  s1    RRRAu|!<!<C!&++!RRRr|   c                ^    g | ]*}t          |t          j                  r|j        j        n|+S rz   r  r  s     r{   r  z8GraphLowering.symbolic_sizes_strides.<locals>.<listcomp>S  s1    VVV!:a#>#>EAFKKAVVVr|   )
r,  rW   sizestridetorch._dynamo.sourcer  r   r-  backed_var_to_val,create_symbolic_sizes_strides_storage_offset)	r  r  r  sourcer  r  _r_sizer_strides	            r{   symbolic_sizes_stridesz$GraphLowering.symbolic_sizes_strides1  s      	,RWWYY779R		: :   <;;;;; $^US1R-S-SUU F LL 	 SRTRRRVVvVVVxr|   )tuple[list[sympy.Expr], list[sympy.Expr]]c                    d |                                 D             }d |                                D             }||fS )z+
        Primarily used to weights
        c                6    g | ]}t          j        |          S rz   r   r   r  s     r{   r  z6GraphLowering.static_sizes_strides.<locals>.<listcomp>\  s"    444Qa  444r|   c                6    g | ]}t          j        |          S rz   r  r  s     r{   r  z6GraphLowering.static_sizes_strides.<locals>.<listcomp>]  s"    888q%-""888r|   )r  r  )r  r  r  r  s       r{   static_sizes_stridesz"GraphLowering.static_sizes_stridesV  sD     54"''))44488BIIKK888V|r|   r   OUnion[ir.TensorBox, ir.StorageBox, ir.Buffer, WorkspaceArg, ir.TorchBindObject]Sequence[Expr]c                "   t          |t          j                  r|j        }t          |t          j                  r|j        }t          |t          j                  r |j        | j        v r| j        |j                 S |                                S ry   )	r   r,   rF   datarE   ComputedBufferr  r=  get_size)r  r   s     r{   get_allocation_sizez!GraphLowering.get_allocation_size`  s     dBL)) 	9DdBM** 	9DtR.//	#	T777 -di88==??"r|   2Union[ir.Buffer, WorkspaceArg, ir.TorchBindObject]r   c                    |                                 }|                     |          }|j        }|j        }t	          |||          S ry   )
get_layoutr  r  offsetr   )r  r   layoutr  r  r  s         r{   get_allocation_storage_sizez)GraphLowering.get_allocation_storage_sizes  sF     ""''--.tVVDDDr|   r   .Union[torch._inductor.ir.IRNode, device, None]featurer.   c                    t          |t                    s
J |            ||                     t          |                    v S ry   )r   r.   r1   r?   )r  r   r  s      r{   has_featurezGraphLowering.has_feature|  sC    
 '>22;;G;;2$33OF4K4KLLLLr|   Tdepro   count_bytesintc                    ||f| j         vr_d}	 |                                s+|r|                                }n|                                }n# t          $ r Y nw xY w|| j         ||f<   | j         ||f         S )zc
        Get the size hint for a dependency with caching to avoid expensive recomputation.
        r   )r  has_unbacked_symbolsnumbytes_hint
numel_hintKeyError)r  r  r  ress       r{   get_dep_size_hintzGraphLowering.get_dep_size_hint  s     T%===C
//11 /" /!//11!nn..    	
 <?D$c;%78'k(:;;s   ?A 
AAtorch.devicec                6    | j         x}r|S t          d          )NzNo current device)re  r   r  r   s     r{   get_current_device_or_throwz)GraphLowering.get_current_device_or_throw  s&    ((6 	4M2333r|   Iterator[None]c              #  V   K   | j         }|| _         	 d V  || _         d S # || _         w xY wry   )re  )r  r   r   s      r{   set_current_devicez GraphLowering.set_current_device  sF      #$	(EEE"'D%D''''s    	(r   c                *    | j         rdS | j        rdS dS )N	inferencebackwardforward)r  r  r  s    r{   get_training_phasez GraphLowering.get_training_phase  s'     	; 	:yr|   rl   c                  t           j        sdS t           j        rdS d | j        j        D             }| j        j        D ]&}t          |          r|                    |           't          |          }|dk    rdS t          j	        j
        j        r>t          j	        j
                                        rt          d |D                       rdS t          t          | j        j                            d|z  k    rt                              d           dS t#          d |D                       rt                              d	           dS d d}d!d}d!d}|rt%          t&                    }|D ]P}	t)          |	          }
|
 ||	          rd}n ||	          rd}n ||	          rd}nd}||xx         |
z  cc<   Qt                              d           d}d}d}d}t+          |                                          }|d         |z  |d         |z  z   |d         |z  z   |d         |z  z   }||k    }|st                              d||           |S t#          t/          ||                    rt                              d           dS t#          t/          ||                    rt                              d           dS t          t/          ||                    rt                              d           dS dS )"zl
        Decide if we should enable layout optimization for this graph based on
        heuristics.
        FTc                V    g | ]&}|j         t          j        j        j        j        u $|'S rz   )r   r   r   r   r   r   )r   ns     r{   r  z3GraphLowering.decide_layout_opt.<locals>.<listcomp>  s5     
 
 
UY^5O5W)W)WA)W)W)Wr|   r   c              3  r   K   | ]2}d D ]-}|j         |         j        d         j        j        t          v V  .3dS )r   r*   valN)rt   r   r   typerb   r   r  r   s      r{   r   z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sg        !   s '.37OO      r|   i,  z*Skipped layout opt because only a few convc              3  j   K   | ].}d D ])}t          |j        |         j        d                   V  */dS r  )r   rt   r   r  s      r{   r   z2GraphLowering.decide_layout_opt.<locals>.<genexpr>  sc       
 

 
  QVC[-e455
 
 
 
 
 
 
r|   zeSee perf regression with dynamic shape. Follow up in https://github.com/pytorch/pytorch/issues/102670r  r   rv   r   c                    | j         d         j        d         }t          |t          j                  sJ | j         d         dk    o|                    d          dk    S )Nr*   r  r&  )rt   r   r   r   r   r  )r  meta_vals     r{   
is_groupedz3GraphLowering.decide_layout_opt.<locals>.is_grouped  sR    vay~e,Hh555556":>:hmmA&6&6&::r|   r   c                   | j         d         j        d                             d          dz  | j         d         j        d                             d          k    o.| j         d         j        d                             d          dk    S )Nr*   r  r      rt   r   r  r  s    r{   is_in_out_channelz:GraphLowering.decide_layout_opt.<locals>.is_in_out_channel  ss    q	u%**1--1QVAY^E5J5O5OPQ5R5RR 6F1IN5)..q11A5r|   c                    | j         d         j        d                             d          dk    o.| j         d         j        d                             d          dk    S )Nr*   r  r   @   r  r  s    r{   is_small_channelz9GraphLowering.decide_layout_opt.<locals>.is_small_channel  sR    q	u%**1--3 8F1IN5)..q11R7r|   Ngroupedsmallin_outr   zConv inputs meta not foundg|?5^?gtV?g333333?guV?zhSkipped layout opt in inference because weighted flops indicate slowdown, default: %d, channels last: %dzFSkip layout opt because found grouped convolution with >1 in_channels!zBSkip layout opt because some convolutions have smaller out_channelz>Skip layout opt because all convolution channels are too small)r  r   rv   r   )r  r   rv   r   )r+   layout_optimizationforce_layout_optimizationri  r   r   r   r   r   backendsr   enabledis_availableallrW  logdebugr   r   floatr:   r   valuesmap)r  r  
conv_nodesr  nconvr  r  r  flop_countsr   counted_flops	node_typeGROUPED_MULTIPLIERDEFAULT_MULTIPLIERIN_OUT_MULTIPLIERSMALL_MULTIPLIERtotal_flopsweighted_flopsdo_layout_opts                      r{   r)  zGraphLowering.decide_layout_opt  s    ) 	5+ 	4
 
x~
 
 

  	% 	%Aa   %!!!$$$JA::5 N!)		%2244		   #    		 4
 tBHN##$$e33IIBCCC5 
 

 
 
 
 
 	
 IIw   5	; 	; 	; 	;
	 	 	 		 	 	 	  +	!,7,>,>K" 8 8 .t 4 4 (:d## * )II%%d++ * 'II&&t,, * (II )II&&&-7&&&&		6777
 "'!& %$k002233K I&);;g&)99:h'*;;< i(+==>  +k9M  		~"  
 ! & s:z**++ 	IIX   5 s$j1122 	IIT   5 s#Z0011 	IIVWWW5tr|   c                ,    | j         | j          d| S |S )z2Prepend the given name with the graph name if any.Nr  r  r  r  s     r{   qualify_namezGraphLowering.qualify_nameO  s%    9 i(($(((r|   list[torch.Tensor]subgraph_nameSubgraphLoweringc                    t          | ||| j        | j        | j        | j        | j        | j        |                     |          
  
        S )a  
        Make a subgraph of the current graph with all inherited parts, except
        the graph module (`gm`) and `example_inputs`.  The subgraphs are lowered
        separately and lifted into a separate function in the parent output
        wrapper code.  The subgraph name is qualified by the parent graph's
        name. Note that the lifting of subgraph is supported for python wrapper
        only. For cpp wrapper, we inline the subgraphs in the parent wrapper.
        )
parentr  r  r  r  r  r  r  r  r  )r   r-  r  r  r  r  r  r  )r  r  r  r  s       r{   make_subgraphzGraphLowering.make_subgraphU  sW      )o(]#'#>*(""=11
 
 
 	
r|   OrderedSet[Node]c                l   d}t           j        j        j        j        g}t          t                               }t          | j        j	        j
                  D ]}|j        t           j        j        j        j        u r|                    |           ||}>|j        |v rHt          |          r|                    |           m|j        D ]}||v r|                    |            n| j        j	        j
        D ]9}|||k    r n.||v r)|j        D ]!}|j        |v r|                    |           ":|S )aC  
        The rule to decide if an node prefer channels last is simple.
        1. if it's input/output of a convolution
        2. if one of its user prefers channels last

        We have rule 1 because cudnn runs a faster convolution kernel for channels last inputs;
        Rule 2 is also important. It makes sure that indirect inputs to convolution also prefers
        channels last.

        Consider the scenario: conv -> batch-norm -> relu -> conv
        Without rule 2, batch-norm output may use a contiguous layout. That will cause 2 extra copies:
        1. the output of batch-norm should be channels last initially since its input is a conv's output.
           Forcing the batch-norm's output to be contiguous results in the first copy
        2. The second conv's input is initially contiguous. This layout is propagated from the batch-norm's output.
           We need convert it to channels last layout which results in the second copy.
        With rule 2, we makes sure all the tensors in the chain uses channels last layout. So both copies
        can be saved.
        N)r   r   r   bmmr   r(   r%   r   rq  ri  r   r   r   r   r   users)r  	last_convnodes_cannot_propagate
output_setr  users         r{   rf  z-GraphLowering.find_nodes_prefer_channels_lastp  sd   & 	"')."4"<!=%''
$++122 	 	Ax59>5===q!!!$ !Ix111a   q!!!  :%%NN1%%%E &$ "( 	) 	)A$iJG ) )D{&<<< NN4((((r|   c                    || j         vr7| j                             |           t                              d|           d S d S )NzUsing FallbackKernel: %s)rh  r   perf_hint_loginfor  s     r{   warn_fallbackzGraphLowering.warn_fallback  sL    t,,,!%%d+++94@@@@@ -,r|   c                    | j                             |j                   |j        | j                            |j                   t
          j        j        r$|| j        vrt
          j        j        | j        |<   d S d S d S ry   )	r8  r   r  indexr9  re   ri  rR  rn  r  s     r{   add_device_infozGraphLowering.add_device_info  s    fk***<#  ...7 	DF$2J$J$J/0w/CD$V,,,	D 	D$J$Jr|   ,torch._subclasses.fake_tensor.FakeTensorModec                    t           j        S ry   )re   	fake_moder  s    r{   r  zGraphLowering.fake_mode  s
    {r|   buffer_name<Optional[Union[ir.TensorBox, ir.Buffer, ir.TorchBindObject]]c           	     :   || j         v r| j         |         S || j        v r| j        |         S || j        v ret          j        j        |         }t          j        |t          j        |j        |j	        gt          j        
                    |          R            S d S Nr  r  )rV  r4  rA  re   ri  r,   ConstantBufferr>   r   dtyper  )r  r  r  s      r{   try_get_bufferzGraphLowering.try_get_buffer  s     $---&{33$+++$[11$.((7$[1D$ ~K./g.J.J4.P.P      tr|   symbolr~   c                     t          d          )Nz'Should not be called for the main graph)r   )r  r  s     r{   add_symbol_graph_inputz$GraphLowering.add_symbol_graph_input  s    DEEEr|   2Union[ir.TensorBox, ir.Buffer, ir.TorchBindObject]c                X    |                      |          }||S t          d|           )Nz$Failed to find buffer matching name )r  r   r  r  bufs      r{   
get_bufferzGraphLowering.get_buffer  s8     !!+..?JO+OOPPPr|   torch.dtypec                   || j         v r| j         |         j        S t          | j        d          rp|| j        j        v rb| j        j        |         }|| j        v r| j        |                                         S || j        v r| j        |                                         S || j        v r| j        |                                         S || j        v r| j        |                                         S t          j	        d|          }|r(|                     |
                    d                    S t          d|           )Nmutation_real_namez1(as_strided|reinterpret_tensor)\(([a-zA-Z0-9_]+),r*   could not find )rA  r  r   ra  r(  rV  	get_dtyper4  rematchgroupr  )r  r  mutated_bufr   s       r{   r*  zGraphLowering.get_dtype  sC   $.((>+.44 DN$899	Bt~@@@.;KHKd111*;7AACCCd///(5??AAA$---&{3==???$+++$[1;;===HI;WW 	.>>!''!**---666777r|   Union[int, Expr]c                F   || j         v r| j         |                                         S || j        v r7| j        |         }|                                sdS |                                S || j        v r| j        |                                         S t          d|           )Nr*   r)  )rA  numelrV  has_tensor_output	get_numelr4  r  r#  s      r{   r3  zGraphLowering.get_numel  s    $.((>+.44666$---%k2C((** q==??"$+++$[1;;===666777r|   rt   r   c                    t          d          5   t                      j        | cd d d            S # 1 swxY w Y   d S )NzGraphLowering.run)r   r'  run)r  rt   r  s     r{   r5  zGraphLowering.run  s    -.. 	& 	&577;%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   488r   ir.Operationc                   |j         J d|             t          |t          j                  sJ |                     dt          | j                             }| j                            |           || j        |<   ||_         |S )NzOperation registered twice: r   )	operation_namer   r,   	Operationr  r   r?  r   rY  )r  r   r  s      r{   register_operationz GraphLowering.register_operation  s     ((*M*M*M((("bl+++++  !<c$/&:&:!<!<==r""" " r|   set_namebuffer	ir.Bufferr<  c                  |                      dt          | j                             }| j                            |           || j        |<   |                                }|[t          |t          j                  r,|	                                r|t          j        d          k    s|                     |           |r||_        |S )Nr$  r!  )r  r   r>  r   rV  
get_devicer   r,   r  is_zero_elementsr   r   r  r  )r  r=  r<  r  r   s        r{   register_bufferzGraphLowering.register_buffer  s      !:s4<'8'8!:!:;;F###$*D!""$$ 62#455  ++--  el51111   ((( 	FKr|   operation_names	list[str]c                p    |                      dd                    |          z             }|| j        |<   |S )Nlist_r  )r  r   rS  )r  rC  r  s      r{   register_operation_listz%GraphLowering.register_operation_list   s7      388O+D+D!DEE*
4r|   node_output%Union[Iterable[ir.IRNode], ir.IRNode]c                .     d fd |           d S )NvaluerI  rv   rw   c                   t          | t          t          f          r| D ]} |           t          | t          j                  r7|                                 D ]$}j        |                             |            #d S d S ry   )r   rW  tupler,   rF   get_read_namesrX  r   )rK  x	read_nameregisterr  s      r{   rQ  z1GraphLowering.register_users_of.<locals>.register(  s    %$//      AHQKKKK%.. @!&!5!5!7!7 @ @I&y188????@ @@ @r|   )rK  rI  rv   rw   rz   )r  rH  rQ  s   ` @r{   register_users_ofzGraphLowering.register_users_of%  sF    	@ 	@ 	@ 	@ 	@ 	@ 	@ 	r|   c                    t          |t                    sJ | j                            |           || j        vrdS | j        |         D ]}|                                 dS )z
        When a buffer is mutated we need to make sure all the reads to
        the old version are realized before the mutation happens.
        N)r   r   rK  r   rX  realize)r  r  r  s      r{   mark_buffer_mutatedz!GraphLowering.mark_buffer_mutated2  st    
 $$$$$$  &&&t)))F&t, 	 	DLLNNNN	 	r|   c                    || j         v r	|| j        v sJ d|z               t          | j         |                   }|| j        j        v r| j        j        |         n| j        |         S )z
        In AOTI, module buffers may have been mutated during the tracing and compilation.
        Thus we need to read from previously stored original buffers, to make sure the
        generated model.so uses correct initial values.
        z$Can not find the original value for )rr  rA  rY   rq  r   )r  r  	orig_names      r{   get_original_value_of_constantz,GraphLowering.get_original_value_of_constant@  s     t3338N8N8N2T9 9O8NN 5T5QRV5WXX	 DK,,, KY''%	
r|   r  Union[Tensor]c                   t           j        j        s3| j                                        D ]\  }}t          ||          r|c S |dt          | j                   }|}|d                                         rd| }|                     |          }t          |          }|}d}|| j        v r| d| }|dz  }|| j        v || j        |<   |j
        d|j        dt          |                                          dt          |                                          dt          |          d	| j        |<   || j        |<   |S )Nconstantr   	constant_r  r*    rO  )r+   aot_inductoruse_runtime_constant_foldingrA  itemsr]   r   isdigitr  r_   r   r  rM  r  r  hashrG  rr  )r  r  r  constant_namerK  rW  prefixcnts           r{   allocate_non_dup_const_namez)GraphLowering.allocate_non_dup_const_nameP  s    "? 	)(,(<(<(>(> ) )$u!$.. )(((() <3c$.1133D	7?? 	&%t%%D  &&  %%dn$$$$s$$D1HC dn$$  $t{  tz  TYY[[!! &+DKKMM&:&: Dzz  	D!
 .7$T*r|   r   rF   c                    |                      ||          }t          j        t          j        |t          |j        |j        g|                     |          R                      S r  )	rf  rF   creater,   r  r>   r   r  r  )r  r  r  new_names       r{   add_tensor_constantz!GraphLowering.add_tensor_constanto  sw     33D$??"K.2.G.G.M.M    
 
 	
r|   device_overrideOptional[torch.device]c                D   | j                  j        |k    s|S t          j        j                                        5  |                      d|j         |j        pd | j                  	                    |                    }|| j         v sJ | d            t          fd| j        D                       r| j         |         | j        |<   t          fd| j        D                       r| j         |         | j        |<   |cddd           S # 1 swxY w Y   dS )z
        We AOT copy constants to the devices they are needed on.
        If device_override doesn't match the constant's device, then
        copy it and return a different name.
        Nr  r   z' should be in V.graph.constants alreadyc              3  >   K   | ]}t          |          k    V  d S ry   r_   )r   r  r  s     r{   r   z.GraphLowering.constant_name.<locals>.<genexpr>  sD         {333     r|   c              3  >   K   | ]}t          |          k    V  d S ry   ro  )r   
param_namer  s     r{   r   z.GraphLowering.constant_name.<locals>.<genexpr>  sD         z222     r|   )rA  r   r   utils_python_dispatch_disable_current_modesrf  r  r  tor   rB  rC  )r  r  rk  non_dup_const_names    `  r{   rc  zGraphLowering.constant_name|  s    >$&/99_=TK[)@@BB 	& 	& "&!A!ALL/.L0E0JLLt$''88" "
 &777%NNN 877     #'#5      :>&:"#56     "&"7      =AN&=%&89 &=	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&s   C	DDDr   tuple[object]ru   dict[str, object]Union[Expr, TensorBox, None]c                j   | xj         dz  c_         t                                          |||          }|                     |          }t	          |t
                    rXt          j        j        st          |j
                  }n|j
        j        }|| j        |<   | j                            |           |S t	          |t          t           t"          f          r:t%          j        |          }|| j        |<   | j                            |           |S t	          |t(                    r7t+          ||          }|| j        |<   | j                            |           |S || j                            |           d S t	          |t,                    r| j                            |           d S t	          |t.          j                  rt3          t          j        j        j                  dk    rat9          t;          t          j        j        j                            j        t.          j        j         j!        t.          j"        j#        j$        fv sJ tK          j&        ||j'                  }|| j        |<   | j                            |           |S t	          |t.          j(                  s
J |            |j)        s| *                    |          \  }}	n| +                    |          \  }}	| j        rS| j,        rL| j         | j,        v r>t[          j.        t_          |ta          |j'        |j1        ||	                              }
n=t[          j.        te          |ta          |j'        |j1        ||	                              }
|
| j        |<   | j                            |           |
j3        j3        | j4        |<   | j        j        r| 5                    |j'                   tm                      5  to          |          s| j8        9                    |           d d d            n# 1 swxY w Y   |
S )Nr*   r  rK  )r  r   r  ):r~  r'  placeholderr  r   r$   re   ri  r  r   r   r  r4  r3  r   r  r   r  r   sympifyr   rG   r   r   	Generatorr   rR  r  r^  iterr   _prims	rng_primsgraphsafe_run_with_rng_stater   higher_orderinvoke_subgraphr,   GeneratorStater   r   _has_symbolic_sizes_stridesr  r  r  rF   rh  r=   r>   r  rA   r  r5  r  r^   r`   ru  r   )r  r   rt   ru   exampler  r   gensizesstridestensorr  s              r{   r|  zGraphLowering.placeholder  sP    	!''%%fdF;;""6**gx(( 	 7& ),W\::|((,Df%"))&111K#tU!344 	=))D(,Df%"))&111K!122 	!vW===C(+Df%"))&111J_"))&1114g}-- 	 "))&111411 
	qw+122a77DQW)/00= =&C	&6= = =  #GGGC(+Df%"))&111J'5<0099'990
 2 	B!66w??NE77!88AANE7 	$	 $(<<<%&w~w}eWUU   FF %&w~w}eWUU   F %+&!%%f----3[-="6*" 	1  000 122 	3 	3.w77 3&**6222	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 	3 s   2*P((P,/P,rf   dict[str, Any]c                   t           j        u rKt          |d         t          t          t
          f          r#t                                          ||          S t          t          j	        j
                  st          d          r |i |S t          vrzt          t          j	        j                  sJ  d                                                                d          d         }|t           v rt#          dd           nt$          j        rt)          g          rt*          nt,          }t.                              d|                    ||                     t5          d	          }|3t          j        j                                      r| j        rt>          }n t5          d	          }tA          |          }t#          |
           n2t)          g          rt+          ||          t-          ||          	 t.          !                    dt                              | j"        }	tG                    }
|
r||}}|
tH          u rd|	j%        v ry|	j%        d         \  }}t          t          j	        j                  sJ dfd} |||          \  }} |||          \  }} |||          \  }}tI          ||||          \  }}n |
|	g|R i |\  }}d|	j%        v r tM          d          |i |}nt                   |i |}|
r| '                    |	||||           |S # tP          $ r}d }t          | d          rH| j"        At          | j"        d          r,| j"        j%         | j"        j%        )                    dd           }tU          ||||          +                    |j,                  d d }~ww xY w)Nr   _inductor_lowering_functionz is not an OpOverloadr   FT)warnoverride_decompz"Creating implicit fallback for:
%s)with_default)layout_constraintz  via %seager_input_valsrt   r   ru   rv   tuple[Any, Any]c                x    t           j        j                            | |          }|J |d         |d         fS )Nr   r*   )r   r   operator_schemasnormalize_function)rt   ru   r   r   s      r{   	normalizez.GraphLowering.call_function.<locals>.normalizea  sE    %*X%>%Q%Q &f& &F $*#5#5#5#)!9fQi#77r|   should_fallbackadd_to_fallback_setrR  r   stack_tracer  )rt   r   ru   r   rv   r  )-operatorgetitemr   rW  rM  dictr'  r   r   r   OpOverloadPacketr   rM   r   r  r   rJ   rN   r+   implicit_fallbacksr   r8   r9   r  r  operator_strr   _libraryrr  
is_builtinr  rQ   rR   r  rR  rO   rH   r   rK   propagate_mutation	Exceptionr   r7   with_traceback__traceback__)r  r   rt   ru   	base_nameerrortagdecided_constraintdefault_tagr  layout_constraintsold_args
old_kwargs	fake_argsfake_kwargsr  outer  r  s    `                 r{   r   zGraphLowering.call_function  s   X%%%*T!WtUD>Q*R*R%77((v>>> &%*"=>> 	+71D
 D
 	+ 64*6***""fej&;<<  000 < ++C003I///f5$GGGGG* -I *6(336--5 
 9&&vtV<<  
 /H/ / / K,77??  (   + '& 1JT1 1 1K *B+)N)N&f8JKKKKK#VH-- I 0fEEE264HHH=	8IIj)F"3444!A!9&!A!A! J'+V*%)BBB
 *QV33128J1K.	;  *&%*2GHHHHH8 8 8 8 8 8 2;9k1R1R.	;'0yv'>'>f/8y:/N/N,*'@ &)[( (f $6#5a#I$#I#I#I&#I#ILD& AF**I&v5III#   '888! O ''8ZvNNNJ 	8 	8 	8Kn--N%1D-v66 2%*6"/488MM#64[  nQ_--48	8s   D,M 
OBOOtc                T    t          | j                  dk    o| j        d         dk    S )zM
        True if this is a small constant attr that will be inlined.
        r*   r      )r   shape)r  s    r{   can_inline_constantz!GraphLowering.can_inline_constant  s&    
 17||q 4QWQZ1_4r|   	tuple[()]OUnion[Constant, TensorBox, ShapeAsConstantBuffer, ir.Subgraph, TorchBindObject]c                   t          | j        |          }t          |t          j        j                  r8|| j        v r| j        |         S t          j        ||          }|| j        |<   |S t          |t          j	        j
                  r%|| j        |<   d| j        |<   t          ||          S t          |t                    r%|| j        |<   d| j        |<   t          ||          S t          t!          |                    r%|| j        |<   d| j        |<   t          ||          S t          |t          j                  sJ t$          j        j        s$t$          j        st-          |          s	|| j        v r|                     ||          S t3                      5  |j        dk    r:t7          |                                |j        |j                  cd d d            S |                     |          rdt@          !                    dtE          |                     ddl#m$}  ||%                                |j        |j        	          cd d d            S 	 d d d            n# 1 swxY w Y   |                     ||          S )
N)r  graph_moduler$  r{  rz   )rK  r  r   zInlining constant: %s r*   )r  )r  r   )&r   rq  r   r   r   rl   rF  r,   Subgraphr   ScriptObjectrD  rG  rG   r   r   r  r   r+   r^  r_  always_keep_tensor_constantsrS   r"  rj  r'   r  r<   itemr  r   r  r  r  r   loweringr  tolist)r  r   rt   ru   rK  r  r  s          r{   get_attrzGraphLowering.get_attr  s$    "$+v66eUX122 	,,,*622+6>>>C*-D'JeUX233 	=/4D$V,*,D'"e<<<</00 	=/4D$V,*,D'"e<<<<DKK(( 	=/4D$V,*,D'"e<<<<%.....<	;2	; )//	; 333++E6:::]] 
	V 
	V{b  **,,ek%,  
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V
 ''.. V		2CKK@@@,,,,,,vellnnEKUUU
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V
V
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V 
	V ''v666s   !9I-'A,I--I14I1r   c                    t           ry   AssertionErrorr  r   rt   ru   s       r{   call_modulezGraphLowering.call_module      r|   c                    t           ry   r  r  s       r{   call_methodzGraphLowering.call_method  r  r|   c                   t                                          |||          }t          |t          t          f          s|f}t          |t          t          f          sJ t          |                      t          d |D                       s
J |            t          j        j	        j
        d         }t          |t          t          f          s|f}d |D             }g }t          |          t          |          k    sJ t          ||          D ]\  }}t          |t          j        t          j        f          s|                    |           Bt          |                                t          j                  r3|                    t          j                            |                     t*          j        j                            |          sJ d |j        d                                         D             }	|                    t          j        ||	                     || _        | j                                        D ]t\  }
}t          |t<                    rt          |t          t>          j         t*          j        j        j!        f          sJ dt          |                       t          |t                    s|"                                 t          |t                    sJ |j#        }t          |t          j$                  sJ |}|j#        }t          |tJ                    r|&                                |
k    rnt          j'        (                    || j)        |
                    	 | j        *                    |          }| j)        |
         | j        |<   c# tV          $ r Y pw xY wv| ,                                 tZ          .                    d| j/        | j0        | j0        nd           d S )	Nc              3    K   | ]}t          |t          t          j        t	          d           t          j        t          j        t          j        j	        j
        t          t          j        t          j        t          f
          V  d S ry   )r   rF   r,   r<   r  r  r   r   logicboolalgBooleanr  EffectfulKernelrD   rG   r   rO  s     r{   r   z'GraphLowering.output.<locals>.<genexpr>  s       
 
   KJJ%JK'/&,# 
 
 
 
 
 
r|   r   c                L    g | ]!}t           j                            |          "S rz   )r,   ExternKernelrealize_inputr  s     r{   r  z(GraphLowering.output.<locals>.<listcomp>  s(    CCCq"///22CCCr|   c                ^    g | ]*}t          |t          j                  r|j        j        n|+S rz   r  r   ss     r{   r  z(GraphLowering.output.<locals>.<listcomp>  sC            $.a#>#>EAFKKA     r|   r  z'Unsupported inductor graph input type: zGForce channels last inputs for %d conv for the current graph with id %dr&  )1r'  r   r   rM  rW  r  r  re   ri  rR  rt   r   zipr,   rF   BaseViewr   get_output_specCommBufferLayoutr  
copy_inputr   	_inductoris_storage_and_layoutr   r  try_match_insignificant_stridesr  r4  r`  rG   r   r   r  rT  r  rE   rA   get_nameMutationLayoutSHOULDREMOVErealize_intor5  r  
ValueErrorfinalizer  r  r*  r
  )r  r   rt   ru   r   fx_node_argsresult_correct_stridesrfx_nodemeta_stridesr  rK  value_storage_boxindr  s                 r{   r   zGraphLowering.output  s    f55&5$-00 	YF&5$-00>>$v,,>>0 
 
  !
 
 
 
 
 	 	" #	 	 
& w+03,66 	+(?LCCFCCC!#<  CKK////fl33 	 	JAwa",!<== &--a0000A--//1DEE  '--bo.H.H.K.KLLLL )??BBBBB    %\%07799     
 '--6q,GG    4,2244 	 	KD%%11 	5:u/A/PQ  G GFeFFG G  eY// MMOOOeY/////JEeR]33333 %JEe[11 
U^^5E5E5M5M-::45d;  ,223DEEC.2.H.ND&s++!   D 6N 			U'!]6DMMB	
 	
 	
 	
 	
s   /N22
O ?O c                B    | j         D ]}|                                 d S ry   )r>  decide_layout)r  r$  s     r{   r  zGraphLowering.finalize3  s2    < 	  	 C	  	 r|   r   c              #  V   K   | j         }	 || _         d V  || _         d S # || _         w xY wry   )rR  )r  r   olds      r{   set_current_nodezGraphLowering.set_current_node7  sF      	$ $DEEE #DD####s    	(c              #  H   K   | j         }	 d V  || _         d S # || _         w xY wry   rO  )r  r  s     r{   set_current_wrapper_codez&GraphLowering.set_current_wrapper_code@  s>      	$EEE #DD####s    	!r  r  
tuple[Any]r  new_args
new_kwargsc                    t          |          t          |          k    sJ t          |          t          |          k    sJ |j        t          j        j        j        u r|j        d         }t          |t                    sJ t          j	        j
                            |d         |d         d |                                D             |d                   }|D ]U}|d         |         }	|d         |         }
|	|
u r#                     t          j        j        j        j        |	|
fi            VdS t          |j        t          j        j                  sJ d fd}|j        j        }t)          t+          ||                    D ]"\  }\  }	}
|j        |         } |||	|
           #d |j        D             }|D ]'}||         }	||         }
||         } |||	|
           (dS )ax  Propagate mutations on new_args/new_kwargs back to old_args/old_kwargs.

        Assumes we may have cloned old_args/old_kwargs into new_args/new_kwargs
        and then called fx_node(*new_args, **new_kwargs).

        If fx_node mutates any of new_args/new_kwargs, and they are different from
        old_args/old_kwargs, then we need to update the original tensor.
        ru   
kernel_idxconstant_args_idxc                r    i | ]4\  }}|t          |t          j        j                  r|j        d          n|5S r  r   r   r   r%   r   r   r  r  s      r{   
<dictcomp>z4GraphLowering.propagate_mutation.<locals>.<dictcomp>a  K       1 
1ehm(D(DKqve}}!  r|   tma_descriptor_metadataN
schema_argtorch._C.Argumentold_arg	ir.IRNodenew_argrv   rw   c                   ||u rd S | j         y| j         j        rot          |t          j                  r|f}|f}t          ||          D ]@\  }}||u r
                    t          j        j	        j
        j        ||fi            =d S d S d S ry   )
alias_infois_writer   r,   IRNoder  r   r   r   r   copy_r   )r  r  r  old_arg_itemnew_arg_itemr  s        r{   maybe_propagatez9GraphLowering.propagate_mutation.<locals>.maybe_propagater  s     '!!$0Z5J5S0 gry11 )&jG&jG25gw2G2G  .L,#|33 &&	,4|\6RTV    1000 r|   c                    i | ]
}|j         |S rz   r  )r   args     r{   r  z4GraphLowering.propagate_mutation.<locals>.<dictcomp>  s    CCC33CCCr|   )r  r  r  r  r  r  rv   rw   )r   r   r   r   r  triton_kernel_wrapper_mutationru   r   r  r   r   get_mutated_tensorsr`  r   r   r  r   r   r   _schemar   r  	arguments)r  r  r  r  r  r  ru   mutatedr  r  r  r  schemar   r  schema_kwargskeys   `                r{   r  z GraphLowering.propagate_mutationH  s8     8}}H----:#j//1111>UY3RRR^H-Ffd+++++-@TT<(./  &   45 G   Y Y$X.t4$X.t4g%%""59>#7#?'7ASUWXXXXF'.%**?@@@@@	 	 	 	 	 	( ''0Xx1H1H'I'I 	: 	:#C#'7)#.JOJ9999CC&2BCCC 	: 	:C oG oG&s+JOJ9999		: 	:r|   r  objectc                L   #$%&' d/fd}d0%& fd
}ddl m} t           j                  $t           j                  't          g          }j        dk    }|r+                               \  }}|t          ||          z  }t          j
                            |          5                                 5  t          j                  5  j        dk    rj        rt          j        t           j        j                  r{t           j        j                            j                  rRt-                    s|                    ddfd          r) |d            t1          j        d          |i |}	n7j        dk    rmt          j        t           j        j        t           j        j        f          r8t5                    r) |d            t1          j        d          |i |}	nj        dk    rj        t           j        j        j        u rt<          j        dk    r |d           t<          j        dk    r|}
|}j         !                    d          x}r&|d         }|d         }tE          ||||          \  }}ntG          g|R i |\  }} $                    j        ||          }	 %                    |
|||           ntM          dt<          j                   tO          j                  r |d           t          j         d         t           j(        t           j)        t           j*        f          rj         d         j+        j,        }	nNt[                      .                              }	n, |d           t[                      .                              }	t           j        j/        j0        j1        t           j        j/        j2        j1        t           j        j/        j3        j1        t           j        j/        j4        j1        t           j        j/        j5        j1        g#tm          d j7        D                       } j8        v &tm          #fdj7        D                       %j         !                    dd          rt          |	tr                    r|	:                                 j         d         ;                                }t!          j<        j        j=        | }|	>                                |k    r6|s4t          j?        |          }t          j@        A                    |	|          }	|rHt          |	tr                    r3t          |	jB        t          jC                  r|	:                                 |s%r't          j         d         t           jD                  r&r j8        !                              }nj         d         ;                                }|t          |          dk    rt<          jE        p& o% }t           jF        G                    j         d                   }t          t          |                    dk    }|sp|rnt          |	I                                          d!k    rI jJ        v r@&s>%s<t          jK        L                    |	I                                t           jM                  }|st          |          rωj         d         N                                st          |	jB        t          jC                  r5t          j@        A                    |	t          j?        |          |"          }	n\t          |	I                                          dk    rt          |          dk    rg }t          j@        O                    |	||"          }	t          t          j7                            }|dk    rct          |	tr                    rMj7        D ]Q}|j        t          v r|	Q                                 t           j        j/        jR        j1        t           j        j/        jS        j1        t           j        j/        jT        j1        g}g } jU        s.|V                    t           j        j/        jW        j1                   t           jX        jY        r|t           j        jZ        j[        j1        t           j        jZ        j[        j\        t           j        j/        j]        j1        t           j        j^        j_        j1        t           j        j^        j_        j`        t           j        j^        j_        j\        t           j        j^        j_        ja        gz  }|t           j        jZ        jb        j1        t           j        jZ        jb        j\        t           j        jZ        jc        j\        t           j        jZ        jd        j1        t           j        j^        je        j1        t           j        j^        jf        j\        gz  }t           jX        jg        r|t           j        jh        ji        j1        gz  }|j        |v rQt          j@        A                    |	t          j?        j         d         ;                                          d#"          }	|j        |v r^|jj        d         u rOt          j@        A                    |	t          j?        t          j         d         jl                                      }	|j        d$k    r:t          |	jB        jB        t          t          f          r|	:                                 S|	jB        }t          |t                    sht          |t          jC        t          jp        f          rB|jB        }t          |t                    s&t          |t          jC        t          jp        f          Bt          |t                    r3|q                    t          j7                            r ||	          }	|	r                    t          j7                             t          |	tr                    r4|	s                                r  ||	          }	|	Q                                 t          |	tr                    ret          |	jB        t                    rK|	jB        jB        }t          |t                    r*|t                    d%&          r|	:                                 d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t          |	            v                    |	           t          t          jx                             } j        $d          D ]}||y                                z  } j        'd          D ]}||y                                z  }t          jz        j{        j|        }j        d'k    rDt          |	t          jx                  r*|}                    |	          r|~                    |	           d1$' fd(} t          jz        j        rj        d'k    r|	S t          t          jz        j{        j|        j         !                    d)i                     }!|!J t          d* |!D                       }"||"k    s0J d+| d,|" d-                                 d. |                                               |           |	S )2z4Lower and execute a single FX node into Inductor IR.msgr   rv   rw   c                d    t                               dt          j                  |            d S )Nzlowering %s %s)r  r  r   format_node)r  r  s    r{   r  z%GraphLowering.run_node.<locals>.debug  s*    II&
1=(A(A3GGGGGr|   r   r  r  r   c           	        t           j                            |j        d                   }|j        d                                         }t          t          |                    dk    }|s|rt          |                                           dk    r\|j        v rSsQsOt          j
                            | t          j        t          |j        d         j                                      } | S )Nr  r      )r   _prims_common%is_non_overlapping_and_dense_or_falser   r  r   r   r  rg  r,   r  require_stride_orderget_stride_orderr   r  )r   r  denser  unbacked_symbols_in_stridesis_input_for_as_stridedis_user_visibler  s        r{   &maybe_apply_channels_last_stride_orderzFGraphLowering.run_node.<locals>.maybe_apply_channels_last_stride_order  s     'MMu E fUm**,,G*-.CG.L.L*M*MPQ*Q'/ ))**a//888' 9/ 9 =='6qve}7JKK   Mr|   r   )CompilerBisectorr   inductorrM   c                 "    t                     S ry   )reprr  s   r{   <lambda>z(GraphLowering.run_node.<locals>.<lambda>  s    a r|   rK   Fr  flexible_layout-user_defined_triton_kernel_layout_constraintsneeds_fixed_stride_orderr  r*   z1Unknown triton_kernel_default_layout_constraint: r   r  r$  c              3  ,   K   | ]}|j         d k    V  dS )r   Nr   )r   r  s     r{   r   z)GraphLowering.run_node.<locals>.<genexpr>!  s)      DDDDGx/DDDDDDr|   c              3  *   K   | ]}|j         v V  d S ry   )r   )r   r  as_strided_opss     r{   r   z)GraphLowering.run_node.<locals>.<genexpr>#  s;       * *26~-* * * * * *r|   inductor_realize_to_stridesNr!  )allow_paddingTr   d   )	thresholdr|  c                     d j         d          D             } |                     d j        d          D                        d                    |           S )Nc                D    g | ]}d |                                  d| dS )unbacked_symbol_defs= in:

get_unbacked_symbol_defs)r   r$  s     r{   r  zCGraphLowering.run_node.<locals>.format_new_defs.<locals>.<listcomp>  sH        V(D(D(F(FUUcUUU  r|   c              3  L   K   | ]}d |                                  d| dV   dS )r<  r=  r>  Nr?  )r   r   s     r{   r   zBGraphLowering.run_node.<locals>.format_new_defs.<locals>.<genexpr>  sV         T(C(C(E(ESSRSSS     r|   z***
)r>  extendr?  r   )r  buffer_watermarkoperation_watermarkr  s    r{   format_new_defsz/GraphLowering.run_node.<locals>.format_new_defs  s     <(8(9(9:  A HH  /*=*>*>?      <<??"r|   unbacked_bindingsc              3  j   K   | ].}t           j        j        j                            ||          V  /d S ry   )re   r  r  unbacked_renamingsr   r  s     r{   r   z)GraphLowering.run_node.<locals>.<genexpr>  sN       /
 /
 K!488A>>/
 /
 /
 /
 /
 /
r|   zfailed  >= z (inductor >= fx)
fx node is: z
new operations are:

)r  r   rv   rw   )r   r  r  r   rv   r  rv   r   )!torch._inductor.compiler_bisectorr+  r   r>  r?  r(   r   fetch_args_kwargs_from_envrX   r,   r  current_originsr  re   r   r   r   r   r   r  rr  r  rL   disable_subsystemrK   HigherOrderOperatorra   r   r  r  r+   'triton_kernel_default_layout_constraintr   r   rH   rI   r   r  r   r   r  SymFloatSymBoolr   r  r'  run_noder   
as_stridedr   as_strided_as_strided_scatterresize	resize_asr   r  r   rF   rT  r  r  any_is_symbolicmaybe_get_strider%  r  r$  r  r  r   r   r"  r#  r   r  rg  FlexibleLayout stride_ordered_for_memory_formatchannels_last_is_viewrequire_exact_stridesrP   realize_hintr   mm_int_mmr  r   r   r   _has_mkldnnr   _linear_pointwiser   mkldnn_rnn_layeronednnqlinear_pointwiser  binary_tensorr   r    _convolution_transpose_pointwiseqconv_pointwiseqconv2d_pointwisehas_mklmkl_mkl_linearrt   r   r  rB   rC   rE   
MutableBoxshould_realize_on_reuse
mark_reusehas_exceeded_max_readshas_large_inner_fnr;   rR  r   r   r@  ri  r2  r  is_unbacked_symintr   r  r    r  create_deferred_runtime_asserts))r  r  r  r*  r+  originsis_call_functionrt   ru   r   r  r  r  inp_args
inp_kwargs	is_outputr  sym_stridesstride_orderr7  r&  r'  	num_usersr  need_fixed_layoutneed_fixed_channels_last_layout_datacurrnew_unbacked_defsr$  r   r  rE  rF  renamed_unbacked_bindingsr5  rC  r(  r)  rD  r  s)   ``                                 @@@@@r{   rS  zGraphLowering.run_node  s   	H 	H 	H 	H 	H 	H	 	 	 	 	 	 	 	0 	GFFFFFt|,,!$/22 $.qc??4?2 	4::1==LD&~dF333GI%%g..K	) K	)!!!$$K	) K	) q!!K	) K	) ''H ( qx)>?? ( N(33AH==	 ( :!<< ( (99"K  ( ()))N)!(NNN 
 ''Huz4ej6TU  ( /q11	 ( ()))N)!(NNN 
 ''H	 6 UUUBFWWWEFFFB12 2  $H!'J+,6::6H+I+II' S#3A#6%5a%8
'@ "$&( (ff (?q'R4'R'R'R6'R'Rf!//$GGF++AxT6RRRR&|FLz||   !** - '(((F5MEL%.%-#P  1 VE]/4FF"WW--a00FFb			))!,, 	)1	*2	19	%-	(0N DDAGDDDDDI4#CCO&) * * * *:;'* * * ' '# vzz7?? XJ	E E X    &-..00#o3CWM**,,777#%#6w#?#?L_AA&,WWF!vy11! v{BK88!     14 1*uu|; ; 1 # 5">BB1EEGGfUm2244G&3w<<!+;+;*A/.A%655 " "/UUu E 1'::;;a? 0 8
!
   1 122a77!@@@ / A 7 A #%"3"T"T"OO--u/B# # 7 3w<<  6%=1133 z"KK8 8  &(_%I%I & " 3G < <.; &J & &FF  #6??#4#455::s7||a?O?O*,%'_%J%J &} &K & &F Jqw//00I1}}FI!>!>}G <- <-D{&;;;++--- "IN?G!IN-5!IN2:-)
 ;=7# Y-44UY^5O5WXXX 8/ Y- %	 0 B J %	 0 B I %	 ? G %	 0 B J %	 0 B I %	 0 B I %	 0 B P2 - < %	 0 G O %	 0 G N %	 0 H O %	 0 Q Y %	 0 @ H %	 0 B I@ ;  %x/ Y 1eim6O6W5X X 1;*;;;%'_%I%I & " 3AF5M4H4H4J4J K K.2 &J & &F !K+JJJ !TYq\ 1 1%'_%I%I & " 3$B16%=CV$W$W!" !"& &F w(**%fk&6I8NOO -"NN,,,$UJ77 'JBK7= = ' "JE %UJ77 'JBK7= = '
 eZ00 OU5R5RLL6 6 O DCFANNF !!#ag,,/// &),, &1N1N1P1P & @?JJ##%%% &),, )FK1T1T ){'dI.. )...== )(((WK	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	) K	)Z 	61%%%v&&&&u|466< 0 1 12 	@ 	@C!=!=!?!??/"5"6"67 	? 	?B!<!<!>!>>G$.	
 DM!!65<00 ",,V44 " !!&)))		# 		# 		# 		# 		# 		# 		# 		# 7 	14=#8#8M. 6G&

3F(K(K
 
 !,,, %/ /
 /
&/
 /
 /
 %
 %
!
 !$====:' : :-F : :==??: :&5o&7&7: : >==
 	,,Q0ABBBs[   ,ut;qt$t;$t((t;+t(,t;/u;t?	?ut?	uuur  OrderedSet[sympy.Symbol]c                    d fd}t                      rl|j        t          j        j        j        j        u rJ j        rC                     |          \  }}|d         d	k    r ||d         |d          d
           d S d S  xj	        |z  c_	        t          j        j        j        }|D ]5} j                            |g           }|j        |         }	|                                                    |	          sbdd}
 |
|	j                  r |||	j        k    | d|	j                     |
|	j                  r |||	j        k    | d|	j                    |D ]~}t+          |j                  }| j	        z
  }|rEt/          |t0                    } j                            |g                               |           g ||j        |j                    7d S )Nr  r#   r  r   rv   rw   c                    t          j        | |          }                    |d                               |           d S )NTr;  )r,   AssertScalarrB  r:  )r  r  	assert_opr  s      r{   make_assertzBGraphLowering.create_deferred_runtime_asserts.<locals>.make_assertE  sG    c22I  T :::##I.....r|   r   Tz to be Truer  r   r   c                n    | t           t            fv rdS 	 t          |            dS # t          $ r Y dS w xY w)NFT)r)   r  	TypeError)r  s    r{   is_convertiblezEGraphLowering.create_deferred_runtime_asserts.<locals>.is_convertiblea  sP    & 111#(5)FFF#'4( ) ) )#(55)s   & 
44rI  z <= )r  )r  r#   r  r   rv   rw   )r  r   rv   r   )r   r   r   r   r   _assert_scalarr   r  rL  r1  re   ri  r2  r  r0  r   var_to_range _default_unspecified_value_rangeissubsetlowerupperr   r  r   r   r   r   )r  r  r  r  	node_argsr  r  i0rasvrr  rafvsmissingi1s   `              r{   ru  z-GraphLowering.create_deferred_runtime_asserts#  sS   D	/ 	/ 	/ 	/ 	/ 	/ %&&,	;EIN9AAA B  ::1==LIq|t##IaLYq\*F*F*FGGGGG $# ''+<<''(2I ( ; ;(,,R44+B/ AACCLLRPP K) ) ) ) &~bh// K#B"(Nr4I4Irx4I4IJJJ%~bh// K#B"(Nr4I4Irx4I4IJJJ ; ;B/88C!D$??G ; c222*55b"==DDRHHHH#BG\::::;); ;r|   c                    t           j        rt          d          t          j        dvrt          dt          j                   d S )NzC++ codegen is disabled)linuxdarwinwin32zUnsupported platform )r+   disable_cpp_codegenr6   sysplatformr  s    r{   !validate_can_generate_cpp_wrapperz/GraphLowering.validate_can_generate_cpp_wrapperx  sL    % 	D()BCCC<;;;()O)O)OPPP <;r|   is_subgraphparent_wrapper_codeOptional[PythonWrapperCodegen]partition_signatures!Optional[GraphPartitionSignature]c                   | j                                         }|                    d           |                    d           t          |          dk    s0J d                    d                    |                                t          |          dk    }|rdn|                                | _        | j        r| 	                                 t          | j                  | _        t          | j        | j        | j                  }|J d| j         d            |                    ||||          | _        | j        r| j        j        j        | j        _        d S d S )	Nr!  r   r*   zDoes not support mixing {}+r   zDevice z not supported)r8  r/  discardr   formatr   r   r:  r  r  r2   rN  r3   r  rh  rO  r  _names_iter)r  r  r  r  r  r8  only_cpuwrapper_code_gen_clss           r{   init_wrapper_codezGraphLowering.init_wrapper_code  s    (--//U###V$$$<  A%%%'C'J'JHH\""(
 (
%%% |$$)$,D55,2B2B2D2D 	5224441$2BCC=d. 
  
 $//6d&666 0// 177 	
 
  	W,0,=,J,VD)))	W 	Wr|   %list[Union[int, float, torch.Tensor]]c                   dd l }|                    | j                  }|                    |          }g }|j        j        D ]?}|j        dk    r2|j        t          j        j	        j
        u r|                    |           @g }i }i }g }	i }
|D ]}|j        d         D ]R}|D ]M}||v rt          |t          j        j                  r't!          |          ||<   |                    |           NS|j        d         }t          j        j                            |j        d         |j        d         d |                                D             |j        d                   }i }|j                            |          5  |                                D ]\  }}||v rO|j                            t          j        |f	          }t!          |	          ||<   |	                    |           X||
v r|
|         ||<   ht!          |	          |
|<   |	                    |           |
|         ||<   	 d d d            n# 1 swxY w Y   |||j        <   |	|z   }|j        j        D ]$}|j        d
k    rt3          |          f|_         n%|                                 t          j                            |          }|                    |          }t!          |          dk    r|t!          |	          d          }i | _        |D ]}d}g }|j        d         D ]}g }|D ]Z}t          |t          j        j                  s|                    |           7d}|                    |||                             [|                    t3          |                     |r|| j        |j        <   |d t!          |	                   | _        || _         d S )Nr   r   gridru   r  r  c                r    i | ]4\  }}|t          |t          j        j                  r|j        d          n|5S r  r  r   s      r{   r  z9GraphLowering.extract_autotune_inputs.<locals>.<dictcomp>  r  r|   r  )rt   r   FT)!r/  deepcopyrp  ri  r   r   r   r   r   r  r  r   ru   r   r   r%   r   r   r   r  r`  inserting_beforer   cloner  rM  rt   	recompileInterpreterr5  rd  rb  rc  )r  r  r/  	cloned_gmtriton_nodesr   grid_inputsvisited_gridstriton_inputskwargs_inputsvisited_kwargsr  r  ru   r  r  r  r  new_nodenew_outputsrunnerreturned_outputsgrid_outputsdynamic_grid	new_gridsnew_grids                             r{   extract_autotune_inputsz%GraphLowering.extract_autotune_inputs  s    	MM$,//	~66O) 	* 	*D?**K59#9#XXX##D))) ,.24(*-/)+  &	2 &	2DF+ 0 0 0 0Cm++ !#ux}55 0-0-=-=c*#**3///0 [*F-@TTL)/0  &   56 G *,J11$77 6 6"LLNN 6 6DAqG||#,?#@#@TUSW#@#X#X(+M(:(:
1%,,X666 N**(6q(9
1 (+M(:(:N1%!((+++$21$5JqMM66 6 6 6 6 6 6 6 6 6 6 6 6 6 6 (2M$)$$#k1O) 	 	Dw(""";//1	 # 	%%i00!::n55{a+C,>,>,@,@AL$&D!$ A A$.0	 K/ 	6 	6D!H# J J)#ux}== %$OOC000$'+ ]35G(HIIII$$U8__5555 A7@D)$)4!12FC4F4F2F!G"/s   >B0H;;H?	H?	)tuple[ValueWithLineMap, ValueWithLineMap]c                F    t           fddD                       rd
 fd}t          j        j        rpt          j        j        rKd} j        D ] }t          |t          j                  rd} n!|r |            } 	                    |            
                                S d _                                         j        } |            }t          j        j                                        5   ||           ddd           n# 1 swxY w Y   ~d _         j                                          j                                          j                                         t,          j        j        j                                         t,          j        j        j                                         t7          j                     t          j        d	di          5   
                                cddd           S # 1 swxY w Y   dS  
                                S )zQ
        For GPU, Triton kernels are autotuned and stored as cubin files
        c              3  *   K   | ]}|j         v V  d S ry   )r8  )r   r   r  s     r{   r   z9GraphLowering.codegen_with_cpp_wrapper.<locals>.<genexpr>		  s+      IIvv**IIIIIIr|   )cudaxpurv   r  c                    ddt           j        j                                        } | |t	          t
          j        t                    s]| j        r| j        	                                 d | j
        D             }fdt          j        |t
          j                  D             n>fdt	          t
          j        t                    rj        nt
          j        D             j        r_d	d
lm} fdt#          j                  D             }|D ]5}|         }t	          |t           j                  sJ  ||          |<   ~6S )NrO  1Union[torch.SymInt, torch.SymFloat, torch.Tensor]rv   Union[int, float, torch.Tensor]c                <   | d S t          | t          j        t          j        f          r| j        j        S t          | t                    rt          |           S t          | t          j                  s'J dt          t          |                     z               | S )Nz&Unknown type when creating real inputs)r   r   r  rQ  r   hintr   r   r   r   r  )rO  s    r{   materializezXGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.materialize	  s     y#t#Aen'EFF 	! v{*#Az22 !%ayy()!U\::  Ds4PQ77||S :  !r|   c                    g | ]}||S ry   rz   )r   params     r{   r  zWGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.<listcomp>$	  s(     # # #! , ,,,r|   c                &    g | ]} |          S rz   rz   r   rO  r  s     r{   r  zWGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.<listcomp>)	  s1     # # # $A# # #r|   c                &    g | ]} |          S rz   rz   r  s     r{   r  zWGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.<listcomp>1	  s1     # # # $A# # #r|   r*   )clone_preserve_stridesc                l    g | ]0\  }}|j         v t          |         t          j                  .|1S rz   )rT  r   r   r   )r   r   r  real_inputsr  s      r{   r  zWGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs.<locals>.<listcomp>=	  sN     * * *%C4#666&{3'7FF 7 666r|   )rO  r  rv   r  )r   _guardsTracingContexttry_getr   re   r  rd   output_stridesclearparams_flatr{  chainr  rT  
compile_fxr  r   r4  r   )	tracing_contextr  r  rU  r   mutated_inpr  r  r  s	         @@r{   extract_real_inputszCGraphLowering.codegen_with_cpp_wrapper.<locals>.extract_real_inputs	  s   ! ! ! !" #(-">"F"F"H"H".zM;8 8. '5 ?'6<<>>># #%4%@# # #K
# # # #!*am!L!L# # #KK# # # #  *!-EE/D//!"# # #K & (BBBBBB* * * * *)243D)E)E* * *&  2 ( ( '2#&6)+u|DDDDD+A+A++N+NC('K""r|   FTNztriton.autotune_at_compile_time)rv   r  )r   r+   tritonautotune_at_compile_timeautotune_with_sample_inputsr?  r   r,   UserDefinedTritonKernelr  codegenr  compile_to_modulecallr   rr  rs  rt  rI  r  rH  rM  re   ri  r2  precomputed_replacementsinv_precomputed_replacementsr-   resetpatch)r  r  user_defined_kernelsr   r  compileds   `     r{   codegen_with_cpp_wrapperz&GraphLowering.codegen_with_cpp_wrapper	  s    IIIIIIIII k	"D# D# D# D# D# D#L }5  * =< B+0("o " "%b"*DEE "370!E" , B&9&9&;&;44[AAA||~~% $) 113381133[1HHJJ * *H[)))* * * * * * * * * * * * * * * $( $**,,,'--///'--/// 9??AAA =CCEEE\#De"LMM * *<<>>* * * * * * * * * * * * * * * * * * <<>>!s$   2D

DD!HH	Hc                    ddl m} t          j        dd          5   || j                  | _         ddd           dS # 1 swxY w Y   dS )z
        (Re)initializes the scheduler member.  When initializing the scheduler, no CUBIN
        files should be generated (to avoid biasing any benchmarks and pessimizing
        fusion decisions).
        r*   )	Schedulerztriton.store_cubinFN)ra  r  r+   r  r?  )r  r  s     r{   _update_schedulerzGraphLowering._update_schedulerv	  s     	)(((((\.66 	8 	8&Yt77DN	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8 	8s   ?AAc                4   t          dd          5  |                                  |                                  t          j                            | j        | j        j                   | j	        
                    |            | j                                         t                              dt          j        j                   | j	                            | j                  }| j	                                         |cd d d            S # 1 swxY w Y   d S )NzGraphLowering.codegenTlog_pt2_compile_eventzFFinished codegen for all nodes. The list of kernel names available: %s)r   r  r  re   r  draw_orig_fx_graphrp  ra  r   rO  push_codegened_graphr  r  ri  rz  generater  pop_codegened_graph)r  r   s     r{   r  zGraphLowering.codegen	  s<   1NNN 	 	""$$$""$$$G&&t|T^5IJJJ224888N""$$$IIX0  
 &//0ABBF11333!	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	s   C.DDDparent_graphc                D   t          dd          5  |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |j        | _        |                                  | j        	                                 ddd           dS # 1 swxY w Y   dS )a  
        This is a more compact version of the `codegen()` above
        where we codegen this graph as a subgraph of some parent
        graph. The parent graph is passed as an argument: the
        intention is to inline codegening of the subgraph in
        the parent graph's wrapper code (including the generated
        kernels). The wrapper code is not finalized (via `.generate()`
        call), as this will be done in the parent graph's `codegen()`.
        zGraphLowering.codegen_subgraphTr  N)
r   rO  rN  r  r8  r9  r:  r  ra  r  )r  r  s     r{   codegen_subgraphzGraphLowering.codegen_subgraph	  s     :RVWWW 		% 		% , 9D*5DO+7D , 9D+7D+7D""$$$N""$$$		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		% 		%s   A6BBBVtuple[int, list[tuple[BaseSchedulerNode, int]], list[tuple[BaseSchedulerNode, float]]]c                    d}g }g }| j         j        D ]^}|                                }||z  }|                    ||dz  f           |                    ||                                f           _|||fS )Nr   r!  )ra  r   get_read_write_buffers_sizesr   get_estimated_runtime)r  total_bytesnode_countsnode_runtimesr   	num_bytess         r{   r  zGraphLowering.count_bytes	  s    
 N( 	G 	GD99;;I9$Ki1n5666  $(B(B(D(D!EFFFFK66r|   zOptional[Callable[[str], None]]save_output_codeCompiledModulec                    t          dddd          5  |                                 cd d d            S # 1 swxY w Y   d S )NzGraphLowering.compile_to_modulecode_genT,inductor_code_gen_cumulative_compile_time_us)
phase_namer  dynamo_compile_column_us)r   _compile_to_moduler  s    r{   r  zGraphLowering.compile_to_module	  s    -!"&%S	
 
 
 	- 	- **,,	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	- 	-s   599c                @   | j         r|                                 n|                                 \  }}t          |t                    r|                     |          }n7t          |t                    r|}nt          dt          |                     |j	        J t          |j	                   t                              d|j	                   t          j        d|j	                   t          j        r#t#          d|j	         t$          j                   t          |t                    rnt(          j                            |j	                   t(          j                            t.          j                            |j	                  d         dz              |S )Nz Unrecognized wrapper code type: Output code written to: %szCompiled module path: )filer   .debug)r  r  r  r   rc   _compile_to_module_linesr0   NotImplementedErrorr  __file__rs   r  r  rq   r  r+   benchmark_kernelprintr  stderrre   output_coder/  ospathsplitext)r  rO  r  mods       r{   r  z GraphLowering._compile_to_module	  st    04/?SD))+++T\\^^ 	a l$455 	//==CC&;<< 	CC%G43E3EGG   |'''%%%		.===93<HHH" 	L93<99
KKKKl$9:: 	GG---GLL))#,77:XEFFF
r|   rO  rc   c                  	 ddl m} t          j        j        rj| j        j                                        }|                    dd          }d|z   | j        j	                                        z   dz   }|j
        z   _
        t          j        t                              j
                   t          j        dj
                   t          j                    }t#          j        |j
                   	 d	 j        D             }|                    j
                  \  }	t          j        d
	           t*          j                            	           t*          j                            t0          j                            	          d         dz              t7          d	fdfd           n## t8          $ r t7          dfd            w xY wt;          dd          5  |                    |	|i | j        | j         | j!                  }d d d            n# 1 swxY w Y   || _"        	| _#        || _$        t          j%        r#t          j&        r|'                    dd           |S )Nr*   )PyCodeCachez"""z\"\"\"z&r"""
Compile-time auto-tuning block: 
z"""
zOutput code: 
%s)codec                &    g | ]\  }}||j         fS rz   r  )r   line_nor   s      r{   r  z:GraphLowering._compile_to_module_lines.<locals>.<listcomp>	  s4       !GT $*+  r|   r  r   r  inductor_output_codec                 H     t           j                                       dS )N)filename	file_path)r  r  abspath)r  s   r{   r/  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>
  s#     $!#!6!6  r|   c                      j         S ry   rK  r  s   r{   r/  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>
  
    <#5 r|   )
payload_fnc                      j         S ry   r,  r  s   r{   r/  z8GraphLowering._compile_to_module_lines.<locals>.<lambda>
  r-  r|   zPyCodeCache.load_by_key_pathTr  )linemapattrs)timesrepeat)(	codecacher"  r+   r  r  rO  kernel_autotune_defsgetvaluereplacekernel_autotune_callsrK  r  r
  rq   r  rT   inductor_meta_from_configrU   begin_compileline_mapwritere   r  r/  r  r  r  r   r  r   load_by_key_pathrA  rD  rE  rj  rk  rl  benchmark_harnessprofile_bandwidth_outputbenchmark_compiled_module)
r  rO  r"  r5  tuning_codeinductor_metar0  r  r   r  s
    `       @r{   r  z&GraphLowering._compile_to_module_lines	  s#    	+*****=1 	B#'#4#I#R#R#T#T #7#?#?{#S#S 7&' #9BBDDE 	  "-|/A!AL)5**<+=>>>1<3EFFF&@BB*=|?QRRRR	 %1%:  G $)),*<==IC!">EEEG%%%GLL))$//2X=>>> &    6555      	 	 	&5555   
 	  8PTUUU 
	 
	..n. 4	 / 	 	C
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 
	 $# 	=(G 	=))!)<<<
s   8B'F8 8 G,/H''H+.H+c                   g }t          j        d          }t          j        d          }|D ]}t          |t          j                  r-|                    | j         dt          |                      It          |t          j                  r-|                    | j         dt          |                      |                    |	                                           |S )Nr   _none_shape)
r{  r|  r   r,   NoneAsConstantBufferr   r  r^  rD   r  )r  r  namesshape_counternone_counterr   s         r{   _get_output_nameszGraphLowering._get_output_names,
  s    !** q))! 	. 	.D$ 788 .	DD\0B0BDDEEEED"":;; .	FFm1D1DFFGGGGT]]__----r|   c                6    |                      | j                  S ry   )rJ  r  r  s    r{   get_output_nameszGraphLowering.get_output_names9
  s    %%d&8999r|   c                   || j         v op| j         |                                         dk    oMt          | j         |                                                   dk    ot	          | j         |                   dk    p|| j        v S )Nr*   r   r!  )r4  r3  r   r  r?   r7  r  s     r{   is_unspec_argzGraphLowering.is_unspec_arg<
  s     D%% B!$'1133q8BD%d+4466771<B   1$ 788EA	3
 T22	3r|   )NNNFFNNFFFNNNNNNF)&r  r  r  r  r  r	  r
  r  r  r   r  r   r  r  r  r  r  r   r  r   r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r   rv   rw   )rv   rw   )r  r  rv   r  )r  r  rv   r  )r   r  rv   r  )r   r  rv   r   )r   r  r  r.   rv   r   )T)r  ro   r  r   rv   r  )rv   r  )r   r  rv   r  rJ  )r  rl   r  r   rv   r   )r  r   rv   r   )r  r  r  r  r  r   rv   r   )rv   r  )r  r   rv   rw   )r   r  rv   rw   )rv   r  )r  r   rv   r  )r  r~   rv   rw   )r  r   rv   r!  )r  r   rv   r&  )r  r   rv   r/  )rt   r   rv   r   )r   r6  rv   r   )r=  r>  r<  r   rv   r   )rC  rD  rv   r   )rH  rI  rv   rw   )r  r   rv   r  )r  r  r  rY  rv   r   ry   )r  r   r  r  rv   rF   )r  r   rk  rl  rv   r   )r   r   rt   rw  ru   rx  rv   ry  )r   rf   rt   r   ru   r  rv   r   )r  r  rv   r   )r   r   rt   r  ru   rx  rv   r  )r   r   rt   r   ru   r   rv   r   )r   r   rt   rw  ru   rx  rv   rw   )r   r   )rv   r  )r  r   r  r  r  r  r  r  r  r  rv   rw   )r  r   rv   r  )r  r   r  r  rv   rw   FNNN
r  r   r  r  r  r  r  r  rv   rw   )r  r  rv   rw   )rv   r  )r  r  rv   rw   )rv   r  )rv   r  )rO  rc   rv   r  )r  r  rv   rD  )rv   rD  )r  r   rv   r   )F__name__
__module____qualname____annotations__r(  r  r  r  r  r  r  r  r  
contextlibr   r  r  staticmethodr)  r  r  rf  r  r  propertyr  r  r   r%  r*  r3  r5  r:  rB  rG  rR  rU  rX  rf  rj  rc  r|  r   r  r  r  r  r   r  r  r  r  rS  ru  r  r  r  r  r  r  r  r  r
  r  r  r  rJ  rL  rN  __classcell__r  s   @r{   r  r  X  sX        """"
 6:(,"&!%) "!$7;,0+/04"37 +QC QC QC QC QC QC QCf1 1 1 1#  #  #  # J   # # # #&E E E EM M M M< < < < <(4 4 4 4 ( ( ( (    ^ ^ ^ \^@   
 
 
 
6> > > >@A A A A
D D D D    X   $F F F FQ Q Q Q8 8 8 8,
8 
8 
8 
8& & & & & &    FK      (   
      
 
 
 
    @ 37
 
 
 
 
&& && && &&Rf f f f f fP~8 ~8 ~8 ~8 ~8 ~8@ 5 5 5 \567 67 67 67p      ]
 ]
 ]
 ]
 ]
 ]
~        $ $ $ ^$ $ $ $ ^$I: I: I: I:VN N N N N N`S; S; S; S;jQ Q Q Q "'+>BBF#W #W #W #W #WJ]0 ]0 ]0 ]0~q" q" q" q"f	8 	8 	8 	8   &% % % %*7 7 7 7" 9=<<<<- - - -   >F F F FP   : : : :3 3 3 3 3 3 3 3r|   r  c                  :     e Zd ZdZd fd	Z	 	 	 	 dd fdZ xZS )r   z
    Mostly a helper class for the subgraph lowering. The main goal is to call
    init_wrapper_code with the subgraph related arguments.
    r  r  rt   r   ru   rv   rw   c                H    || _          t                      j        |i | d S ry   )r  r'  r(  )r  r  rt   ru   r  s       r{   r(  zSubgraphLowering.__init__M
  s,    $)&)))))r|   FNr  r   r  r  r  r  r  r  c                n    t                                          d| j        | j        j                   d S )NT)r  r  r  )r'  r  r  r  rO  )r  r  r  r  r  r  s        r{   r  z"SubgraphLowering.init_wrapper_codeQ
  s@     	!!) $ 8 	" 	
 	
 	
 	
 	
r|   )r  r  rt   r   ru   r   rv   rw   rO  rP  )rQ  rR  rS  __doc__r(  r  rX  rY  s   @r{   r   r   G
  sx         
* * * * * * "'+>BBF
 
 
 
 
 
 
 
 
 
 
r|   r   )rt   r   ru   r   rv   rw   )r}   r~   rv   r   )r   r   rv   r   )r   rl   r   r   rv   r   )r   rm   rv   r   )r   r   rv   r   )r   rm   r   r   rv   rw   )r   r%   rv   r   )
__future__r   rU  r   r{  loggingr  r  r+  r  rZ  collectionsr   r   typingr   r   r   r	   r
   r   r   r   torch._loggingtorch.fxr   r   torch._decompr   torch._dynamo.utilsr   r   "torch._library.fake_class_registryr   torch._library.opaque_objectr   torch._library.utilsr   r   r   torch._prims_commonr   r   torch._subclasses.fake_tensorr   torch._utils_internalr   %torch.fx.experimental._backward_stater   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r    r!   r"   r#   r$   torch.fx.noder%   torch.fx.passes.reinplacer&   torch.utils._mode_utilsr'   torch.utils._ordered_setr(   torch.utils._sympy.numbersr)   r$  r+   r,   r-   codegen.commonr.   r/   r0   r1   r2   r3   r4   r5   excr6   r7   r8   r9   fx_utilsr:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rG   r  rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   runtimerT   runtime.autotune_cacherU   r2  rV   rr  rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   virtualizedrd   re   collections.abcrf   rg   rh   ri   typesrj   torch._higher_order_ops.effectsrk   rl   torch.fx.graphrm   codegen.wrapperrn   dependenciesro   ra  rp   r  torch._inductor.codecacherq   	getLoggerrQ  r  _logginggetArtifactLoggerr  r   r   r|  r_  rQ  torch._inductor.fb.utilsrs   r   r   r   r   r   r   r   r   r  r  r   rz   r|   r{   <module>r     s   " " " " " "               				 				 



  # # # # # # % % % % % % @ @ @ @ @ @ @ @ @ @ @ @ @ @                              , , , , , , 4 4 4 4 4 4 4 4 ? ? ? ? ? ? 7 7 7 7 7 7 : : : : : : 7 7 7 7 7 7 7 7        5 4 4 4 4 4 : : : : : : ? ? ? ? ? ? L L L L L L L L	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	       1 1 1 1 1 1 / / / / / / / / / / / / - - - - - - ! ! ! ! ! ! ! ! ! !	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	            % $ $ $ $ $                                                         $ # # # # # 8 8 8 8 8 8 & & & & & &                              ( ' ' ' ' ' ' '  >FFFFFFFFFFFF      ;;;;;;$$$$$$$$$$$$555555!!!!!!,,,,,,:'<<=N 5 5 5 5 5 5 g!!00<HHy~*9?,, 6 8888888      (   
      $   0V/ V/ V/ V/r   *l#3 l#3 l#3 l#3 l#3EH( l#3 l#3 l#3^G
 
 
 
 
} 
 
 
 
 
r|   