
    Wjb3                   :   U d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZ
d dlZd dlZd dlmZmZ d dlmZ d dlmZmZ d dlmZmZ d dlZd dlZd dlZd dlmZ d dlmc m Z! d dl"m#Z#m$Z$ d dl%m&Z& d d	l'm(Z( d d
l)m*Z+ d dl,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5 d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z=m>Z> d dl?m@Z@mAZA d dlBmCZCmDZDmEZEmFZFmGZGmHZH d dlImJZJ d dlKmLZL d dlMmNZN dd
lOm*Z* ddlPmQZQ ddlRmSZSmTZTmUZUmVZV ddlWmXZX ddlYmZZZm[Z[m\Z\ ddl]m^Z^ ddl_m`Z` ddlambZbmcZcmdZdmeZemfZf dd lgmhZhmiZimjZj erd dlkZld dlmZme*jn        Zod!epd"<    ejq        er          Zsd#epd$<   ejt        ju        Zuejt        jv        Zve G d% d&                      Zwe G d' d(                      Zxe G d) d*                      Zydd.Zzdd1Z{dd2Z|dd4Z} G d5 d6          Z~dd7Z e~            Zdd:Zdd;Zdd>Z	 	 dddJZddKZddLZddMZddNZddOZddPZddQZddRZddVZdd[Zdd^Z	 	 	 dddhZddnZddqZddsZddtZdduZddxZddyZddzZddZ	 dddZddddZdddddZ ed          ZddZddZddZej        dd            Zd dZddZddZddZddZddZddZddZddZ	 dddZddZddZd	dZd
dZddZddƄZd dlmZ ddʄZdd˄Z	 ddd̈́ZddτZdd҄ZddԄZ	 ddd֜dd؄Z	 	 	 	 	 dddZdS (      )annotationsN)defaultdictdeque)Callable)	dataclassreplace)AnyTYPE_CHECKING)countersis_node_meta_valid)(create_structured_trace_for_min_cut_info)is_with_effects)config)CustomKnapsackSolverCustomRuntimeEstimator)FakeScriptObject)
is_builtin)
LazyStringtrace_structured)	trace_log)extract_tensor_metadata)BackwardState)is_sym_nodepy_sym_types)magic_methodsmethod_to_operator)find_symbol_binding_fx_nodesfree_symbolsis_symbol_binding_fx_node	size_hintstatically_known_falsestatically_known_true)graph_drawer)
OrderedSet)CheckpointPolicy   )GraphInfoProvider)dp_knapsackdp_knapsack_sliding_hirschberggreedy_knapsackilp_knapsack)KnapsackEvaluator)	AOTOutputSavedForBackwardsAOTOutput#SavedForBackwardsNoVcCheckAOTOutput)_is_functional_graph)get_aot_graph_name)_is_bwd_seed_offset_is_fwd_seed_offset
_is_primal_is_tangentget_cuda_generator_meta_val)fx_graph_cseget_aten_targetraise_getitemsboolAOT_PARTITIONER_DEBUGzlogging.Loggerlogc                  n    e Zd ZU dZded<   ded<   ded<   ded<   ded<   ddZddZddZddZddZ	dS )OpTypesz8Class for keeping track of different operator categorieszOrderedSet[Callable[..., Any]]fusible_opscompute_intensive_ops
random_opsview_opsrecomputable_opsnodefx.Nodereturnr:   c                .    t          |          | j        v S N)r8   r?   selfrD   s     b/home/longshao/multi-rider-rag/.venv/lib/python3.11/site-packages/torch/_functorch/partitioners.py
is_fusiblezOpTypes.is_fusibleg   s    t$$(888    c                .    t          |          | j        v S rH   )r8   r@   rI   s     rK   is_compute_intensivezOpTypes.is_compute_intensivej   s    t$$(BBBrM   c                .    t          |          | j        v S rH   )r8   rA   rI   s     rK   	is_randomzOpTypes.is_randomm   s    t$$77rM   c                .    t          |          | j        v S rH   )r8   rB   rI   s     rK   is_viewzOpTypes.is_viewp   s    t$$55rM   c                .    t          |          | j        v S rH   )r8   rC   rI   s     rK   is_recomputablezOpTypes.is_recomputables   s    t$$(===rM   NrD   rE   rF   r:   )
__name__
__module____qualname____doc____annotations__rL   rO   rQ   rS   rU    rM   rK   r>   r>   ]   s         BB////9999....,,,,44449 9 9 9C C C C8 8 8 86 6 6 6> > > > > >rM   r>   c                      e Zd ZU ded<   ded<   ded<   ded<   ded<   ded	<   ded
<   ej        dd            ZddZddZddZ	ddZ
dS )NodeInfolist[fx.Node]inputsOrderedSet[fx.Node]_required_fw_nodesrequired_bw_nodestangents_closureunclaimed_nodesdict[fx.Node, int]fw_orderstatic_lifetime_input_nodesrF   c                J     t          d  j        D              fd          S )Nc              3     K   | ]}|V  d S rH   r\   .0ns     rK   	<genexpr>z-NodeInfo.required_fw_nodes.<locals>.<genexpr>   s"      001Q000000rM   c                    j         |          S rH   )rg   )rm   rJ   s    rK   <lambda>z,NodeInfo.required_fw_nodes.<locals>.<lambda>   s    a@P rM   key)sortedrb   rJ   s   `rK   required_fw_nodeszNodeInfo.required_fw_nodes   s:    00/0006P6P6P6P
 
 
 	
rM   rm   rE   r:   c                    || j         v S rH   )rb   rJ   rm   s     rK   is_required_fwzNodeInfo.is_required_fw   s    D+++rM   c                    || j         v S rH   )rc   rw   s     rK   is_required_bwzNodeInfo.is_required_bw   s    D***rM   c                    || j         v S rH   )re   rw   s     rK   is_unclaimedzNodeInfo.is_unclaimed   s    D(((rM   intc                T    || j         vrt          d| d          | j        |         S )NNode z not in fw nodes!)rb   AssertionErrorrg   rw   s     rK   get_fw_orderzNodeInfo.get_fw_order   s7    D+++ !=!=!=!=>>>}QrM   N)rF   r_   )rm   rE   rF   r:   )rm   rE   rF   r}   )rW   rX   rY   r[   	functoolscached_propertyru   rx   rz   r|   r   r\   rM   rK   r^   r^   w   s          ++++****))))((((    4444
 
 
 

, , , ,+ + + +) ) ) )           rM   r^   c                  B    e Zd ZU ded<   ded<   ded<   ded<   ded<   dS )MinCutOptionsr:   ban_if_used_far_apartban_if_long_fusible_chainsban_if_materialized_backwardban_if_not_in_allowlistban_if_reductionN)rW   rX   rY   r[   r\   rM   rK   r   r      sN         $$$$&&&&!!!!rM   r   rD   rE   rF   c                h    | j                             dd           t          j        t          j        fv S )N	recompute)metagetr%   MUST_RECOMPUTEPREFER_RECOMPUTErD   s    rK   must_recomputer      s0    9==d++')0  rM   fx_gfx.GraphModulec                H    | j         j        D ]}t          |          r dS dS )NTF)graphnodesr   r   rD   s     rK   has_recomputable_opsr      s7    
   $ 	44	5rM   c                    | j         j        D ]F}t          |          r5t          |j        d          r t
          j        j        |j        j        v r dS GdS )NtagsTF)	r   r   r   hasattrtargettorchTagnondeterministic_seededr   r   s     rK   has_recomputable_rng_opsr      s^    
   4  	V,,	 	1T[5EEE445rM   r}   c                
   t          | j        d         t          j        t          j        f          rdS t          | j        d         t          j                  s*t          dt          | j        d                              dS )Nvalr&   z.expected node.meta['val'] to be SymFloat, got    )
isinstancer   r   SymIntSymBoolSymFloatr   typer   s    rK   sym_node_sizer      sw    $)E"U\5=$ABB qdi&77 
UT$)EBR=S=SUU
 
 	
 1rM   c                      e Zd ZddZdS )InvalidNodeBaserF   strc                    dS )NzInvalid Noder\   rt   s    rK   __repr__zInvalidNodeBase.__repr__   s    ~rM   N)rF   r   )rW   rX   rY   r   r\   rM   rK   r   r      s(             rM   r   c                6    t          | j        dd           dk    S )N	namespace_c10d_functional)getattrr   r   s    rK   is_not_collectiver      s    4;T226HHHrM   getitem_nodefx.Node | Nonec                :   | j         t          j        k    rdS | j        d         }| j        d         }t	          |t
          j                  r|j        dk    rdS d|j        vrdS |j        d         }||vrdS ||         }t	          |t
          j                  r|S dS )zGiven a getitem node, check if it extracts from a higher-order op
    that has kwargs mapping the key back to an original input.

    Returns the original input node if found, None otherwise.
    Nr   r&   call_functionkwargs)	r   operatorgetitemargsr   fxNodeopr   )r   	ho_resultrr   r   original_inputs        rK   _get_ho_op_original_inputr      s     h...t!!$I

A
Ci)) Y\_-L-Lty'''th'F
&tC[N."'** 4rM   c                    | j         t          j        j        j        j        t          j        j        j        j        fvrdS | j        d         }t          |t          j
                  sdS t          |          S )zCheck if node is a view/reshape of a higher-order op output that aliases an input.

    Returns the original input node from the higher-order op's kwargs if the pattern
    matches, None otherwise.
    Nr   )r   r   opsatenviewdefaultreshaper   r   r   r   r   )rD   sources     rK   _is_copy_node_bw_onlyr      sa     {59>.6	8N8VWWWtYq\Ffbg&& t$V,,,rM   envdict[fx.Node, Any]c                    t          |           }|'||v r#t          ||         t                    s||         S t          |           }|'||v r#t          ||         t                    s||         S dS )a  Try to find a valid input replacement for an invalid forward output.

    This handles cases where a forward output depends on backward nodes but
    semantically aliases an input. For example, a view of a getitem from a
    triton kernel that mutates a buffer in backward, or a direct getitem from
    such a higher-order op. The original input may be a primal or a valid
    intermediate node already present in the forward graph.
    N)r   r   r   r   )rD   r   r   s      rK   _find_input_for_invalid_outputr      s     +400N"c!!3~.@@ " >"".t44N"c!!3~.@@ " >""4rM   Fjoint_graphfx.Graphr`   r_   outputsoutputs_descslist[AOTOutput]subgraph
str | Noneignore_must_be_in_fw_bwc                x   t          j                    }i |D ]-}|                    |j                  }|j        |_        ||<   .| j        D ]}|sHt          |          r|dk    r||vrt          |<   )t          |          r|dk    r||vrt          |<   M|v rR|j	        dk    rt          |<   h|j	        dk    r`t          j        |j        i |j        }	fd|	D             }	t          |	          rt          |<   |                    |fd          |<   |j	        dk    r|                    |fd          |<   |j	        d	k    r	 	g }
t!          ||          D ]{\  }}t#          |t           j                  rE|vrt'          d
| d          t#          |         t(                    rd}|j        t,          j        j        j        j        u rt          |          rt7          |j                  dk    rmt#          |j        d         t           j                  rH|j        d         v r9t#          |j        d                  t(                    s|j        d                  }|t9          |          }||
                    |           6t=          d
| d          |
                    |                    f|
                    |           }|                    tA          |
                    }||j        d<   |!                                 |"                                 |S )a  
    Given a graph, extracts out a subgraph that takes the specified nodes as
    inputs and returns the specified outputs.

    This includes specifying non-placeholder nodes as inputs.

    The general strategy is to initialize all inputs with proxies as we
    encounter them, and trace through the graph, only keeping values which take
    in valid proxies. Then, all dead code is eliminated.
    backwardforwardplaceholderr   c                z    g | ]7}t          |t          j                  t          |         t                    8S r\   )r   r   r   r   )rl   xr   s     rK   
<listcomp>z6_extract_graph_with_inputs_outputs.<locals>.<listcomp>P  sI       a))3q6?33  rM   c                    |          S rH   r\   r   r   s    rK   rp   z4_extract_graph_with_inputs_outputs.<locals>.<lambda>Y      CF rM   get_attrc                    |          S rH   r\   r   s    rK   rp   z4_extract_graph_with_inputs_outputs.<locals>.<lambda>\  r   rM   outputr   z couldn't be found in envNr&   r   z was invalid, but is outputdesc)#r   Graphr   namer   r   _must_be_in_backwardInvalidNode_must_be_in_forwardr   pytreearg_tree_leavesr   r   any	node_copyzipr   r   RuntimeErrorr   r   r   r   r   copy_r   lenr   appendr   r   tupleeliminate_dead_codelint)r   r`   r   r   r   r   	new_graphrD   new_nodeall_argsoutput_valuesr   x_descreplacementoutr   s                  @rK   "_extract_graph_with_inputs_outputsr     s   $ 

I"$C   ((33	D		! ) )& 	$T**
**&&'D	 $D))	))&&'D	3;; W%%#CIIW''-tyHDKHHH   !  H
 8}} 'D	!++D2B2B2B2BCCCIIW
""!++D2B2B2B2BCCCIIW  M-00 $ $	6a!! 	$||"#G1#G#G#GHHH#a&/22 M # H	 4 <<<,Q// =AFq(("16!9bg66 )q	S((&s16!9~GG ) #&afQi.K &"@C"H"HK*!((555$%KQ%K%K%KLLL  Q((((  ####


5//
0
0C$CHV!!###NNrM   c                    t           j        oWt          | j        t          j        j                  rt          | j                   p| j        t          j        j	        j
        k    S rH   )r   is_non_builtin_to_includer   r   r   _ops
OpOverloadr   r   higher_order triton_kernel_wrapper_functionalr   s    rK   r   r     sP    + 	DK!6	7	7	W
4;@W@W<W 	R;%)0QQrM   c                r    | j         dk    o,t          | j                            d          t                    S )Nr   r   )r   r   r   r   r   r   s    rK   _is_backward_stater    s,    7m#W
49==3G3G(W(WWrM   c                @    | j                             dd           dk    S )Npartitioner_tagis_backwardr   r   r   s    rK   _has_tag_is_backwardr
    s    9==*D11]BBrM   c                @    | j                             dd           dk    S )Nr  
is_forwardr	  r   s    rK   _has_tag_is_forwardr    s    9==*D11\AArM   c                @    | j                             dd           dk    S )Nr  must_be_in_forwardr	  r   s    rK   _has_tag_must_be_in_forwardr    s    9==*D115IIIrM   c                @    | j                             dd           dk    S )Nr  must_be_in_backwardr	  r   s    rK   _has_tag_must_be_in_backwardr    s    9==*D115JJJrM   c                    t          |           rdS t          | j        t          j        j                  o| j        j        j        }t          |            ot          |            o|S NT)
r  r   r   r   r   r  _schema
is_mutabler
  r  rD   r  s     rK   r   r     sr    "4(( t 	4;
 566 	+K* 
 !&&& 	,T222	rM   c                    t          |           rdS t          | j        t          j        j                  o| j        j        j        }t          |           o|S r  )	r  r   r   r   r   r  r  r  r
  r  s     rK   r   r     sU    #D)) t4;
 566 	+K*   %%4*4rM   joint_modulenum_fwd_outputsEtuple[list[fx.Node], list[fx.Node], list[AOTOutput], list[AOTOutput]]c          	        t          j        d | j                            d          D              }t          j        t	          t          | j                            d                              j                            dd gt          |          z                      }|d |         }||d          }|d |         }||d          }||||fS )Nc              3  $   K   | ]}|j         V  d S rH   r   rl   rD   s     rK   rn   z+_extract_fwd_bwd_outputs.<locals>.<genexpr>  s$      	K	K$)	K	K	K	K	K	KrM   r   r   r   )	r   r   r   
find_nodesnextiterr   r   r   )r  r  r   r   fwd_outputsbwd_outputsfwd_outputs_descsbwd_outputs_descss           rK   _extract_fwd_bwd_outputsr)    s     $	K	K 2 = = = J J	K	K	KG *T,$//8/<<==>>CGGTFS\\)	
 	
 M
 *?*+K/**+K%&6&67%o&6&67%68IIIrM   saved_valuesr   r   Nonec                V    | D ]%}|j         |k    r|                     |            d S &d S rH   )r   remove)r*  r   saved_values      rK   _remove_by_namer/    sI    #  t##,,,EE $ rM   fwd_module_outputs#list[fx.Node] | tuple[fx.Node, ...]c                    t          |           }t          t          |           dz
  dd          D ]}t          | |                   s|dz   } n|S )Nr&   )r   ranger   )r0  idxis      rK   find_first_sym_noder7    sk      
!
!C3)**Q.B77  -a011 	a%CE	 JrM         @-q=r   torch.fx.Graphtorch.fx.Nodemaxfloatminpositionc           	     
   |                      |          5  |                     t          j        j        j        j        |f          }t          j        j        j                            |j        d                   |j        d<   t          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |                      |          5  |                     t          j        j        j	        j        |dgdf          }t          j        j        j	                            |j        d         dgd          |j        d<   t          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |                      |          5  |                     t          j        j
        j        j        |t          j        f          }t          j        j
        j                            |j        d         t          j                  |j        d<   t          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |                      |          5  |                     t          j        j        j        j        ||f          }t          j        j        j                            |j        d         |          |j        d<   t          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |                      |          5  |                     t          j        j        j        j        |f          }	t          j        j        j                            |j        d                   |	j        d<   t          |	j        d                   |	j        d<   d d d            n# 1 swxY w Y   |                      |	          5  |                     t          j        j        j        j        |	|f          }
t          j        j        j                            |	j        d         |          |
j        d<   t          |
j        d                   |
j        d<   d d d            n# 1 swxY w Y   |                      |
          5  |                     t          j        j
        j        j        |
t          j        fd| d|j                   }t          j        j
        j                            |
j        d         t          j                  |j        d<   t          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |S )	Nr  r   tensor_metar3  Tfp8_scale_pos__r   r   )inserting_afterr   r   r   r   absr   r   r   amaxprimsconvert_element_typefloat64	clamp_min
reciprocalmulTensorfloat32r   )r   rD   r<  r>  r?  abs_node	amax_nodeamax_64_nodeclamp_min_nodereciprocal_nodemul_node
scale_nodes               rK   calculate_quantization_scalingrW    s    
		t	$	$ U U&&IN& ' 
 
  %y~199$)E:JKKe'>x}U?S'T'Tm$U U U U U U U U U U U U U U U 
		x	(	( W W''IN'RD$' ( 
 
	 !&	 3 ; ;M% 2$!
 !
	u )@	u@U(V(V	}%W W W W W W W W W W W W W W W 
		y	)	) 

 

**IO08U]+ + 
 
 $)9?#G#O#ON5!5=$
 $
%  ,Ce$,
 ,
-(

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 
		|	,	, 

 

,,IN$,$ - 
 
 &+Y^%=%E%Ee$c&
 &
E" .E&.
 .
M*

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 
		~	.	. 

 

--IN%- " . 
 
 ',in&?&G&G&'
 '
U# /F '/
 /
]+

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 
			/	/ U U&&IN%!3' ' 
 
  %y~188 ' 
  
e (?x}U?S'T'Tm$U U U U U U U U U U U U U U U 
		x	(	( 	Y 	Y((IO08EM*8(88TY88 ) 
 


 "'!E!M!MM% %-"
 "

 *AQVAW)X)X
&	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y 	Y s   BB22B69B6BE44E8;E8B&II
I
&BLLL$BO  OO BQ>>RRB2UU #U rV  
quant_typetorch.dtyperK  	clamp_maxc           	        |                      |          5  |                     t          j        j        j        j        |t          j        f          }t          j        j        j                            |j        d         t          j                  |j        d<   t          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |                      |          5  |                     t          j        j
        j        j        ||f          }t          j        j
        j                            |j        d         |j        d                   |j        d<   t          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |                      |          5  |                     t          j        j
        j        j        ||f          }	t          j        j
        j                            |j        d         |          |	j        d<   t          |	j        d                   |	j        d<   d d d            n# 1 swxY w Y   |                      |	          5  |                     t          j        j
        j        j        |	|f          }
t          j        j
        j                            |	j        d         |          |
j        d<   t          |
j        d                   |
j        d<   d d d            n# 1 swxY w Y   |                      |
          5  |                     t          j        j        j        j        |
|fd| d|j                   }t          j        j        j                            |
j        d         |          |j        d<   t          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |S )Nr  r   rA  fp8_quant_pos_rC  rD  )rE  r   r   r   rH  rI  r   rO  r   r   r   rM  rN  rK  rZ  r   )r   rD   rV  rX  rK  rZ  r?  target_node_32scaled_target_nodeclamp_min_scaled_nodeclamp_max_scaled_nodequant_activation_nodes               rK   perform_quantizationrb  -  s    
		z	*	* 

 

,,IO08& - 
 
 &+Y_%I%Q%QIeem&
 &
E" .E&.
 .
M*

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 
		~	.	. 

 

"00IN% *- 1 
 
 */);)B)B&
(>*
 *
& 2I#E*2
 2
.

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 
		1	2	2 

 

 % 3 3IN$,$i0 !4 !
 !
 -2IN,D,L,L#E*I-
 -
"5) 5L!&u-5
 5
"=1

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 
		4	5	5 

 

 % 3 3IN$,'3 !4 !
 !
 -2IN,D,L,L!&u-y-
 -
"5) 5L!&u-5
 5
"=1

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 
		4	5	5 
 
 % 3 3IO08'48(88TY88 !4 !
 !
 IO088%*51:  	"5)
 5L!&u-5
 5
"=1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 ! s^   B&CCC(BFFF1BIII/BLLL-BOOOtensortorch.Tensorc                b    |                                  }|                                 }||z  dz  S )z
    Calculate the size of a PyTorch tensor in megabytes (MB).

    Args:
        tensor (torch.Tensor): Input tensor

    Returns:
        float: Memory size in MB
    i   )numelelement_size)rc  num_elementsrg  s      rK   calculate_tensor_sizeri  s  s2     <<>>L&&((L<'K88rM   list[torch.dtype]c                     t           j        j        j        d                             dd          } d |                     d          D             } | S )N!activation_quantization_aten_passallowed_dtypesztorch.bfloat16c                j    g | ]0}t          t          |                    d           d                   1S ).r3  )r   r   split)rl   dtypes     rK   r   z&get_allowed_dtypes.<locals>.<listcomp>  s@       16u{{3''+,,  rM   ;)r   	_inductorr   post_grad_fusion_optionsr   rp  )rm  s    rK   get_allowed_dtypesru    s_    _+D+	c
,--  :H:N:Ns:S:S  N rM   c                <   t                      }t          |           r| j        d         j        |vrdS t          j        j        j        d                             dd          }t          | j        d                   }t          j        j        j        d                             dd          s||k    S t          j        j        j        d                             dd          r't          ||k              pt          ||k               S t          ||k              S )Nr   Frl  
size_in_mbd   skip_dynamo_guardsquantize_dynamic_shape)ru  r   r   rq  r   rs  r   rt  r   ri  r"   r!   )rD   rm  size_thresholdrw  s       rK   should_quantizer|    s   '))Nd## ty'7'=^'S'Su_+D+	c,  'ty'788J?!:+	c
&&G ^++ ?!:/

#&
.
.	G )n,  J+J.,HIIIJ
 )~)EFFFrM   c                     t           j        j        j        d                             dd          } t          t           |                     d          d                   S )Nrl  rX  ztorch.float8_e5m2ro  r3  )r   rs  r   rt  r   r   rp  )rX  s    rK   get_quant_typer~    sO    '@+	c,+,,  5***3//3444rM   rq  tuple[float, float]c                F    t          j        |           }|j        |j        fS )z
    Calculate the range of values for a given torch.dtype.
    Args:
        dtype (torch.dtype): The input dtype.
    Returns:
        tuple: A tuple containing the minimum and maximum values.
    )r   finfor>  r<  )rq  infos     rK   calculate_ranger    s"     ;uD8TXrM   c           
        |                      d          d         }|j        d         }t                      }t          |          \  }}t	                      g }g }t          |          D ]\  }}	|	j                            dd          rbt          j	        j
        j        d                             dd          rct          | |	|d	|          }
t          | |	|
||||          }t          |
          s|                    |
           n|                    |
           n|                     |	          5  |                     t          j        j        j        j        |	|fd
| d|	j                   }t          j        j        j                            |	j        d         |          |j        d<   t/          |j        d                   |j        d<   d d d            n# 1 swxY w Y   ||<   fdt          |          D             }t1          |          }||z   }|r|d |         |z   ||d          z   }|                    dt5          |                     t6          d         dxx         dz  cc<   d S )Nr   r!  r   saved_for_quantizationFrl  use_scalingTr9  r\  rC  rD  r   rA  c                B    g | ]\  }}                     ||          S r\   )r   )rl   r6  rD   position_to_quants      rK   r   z*quantize_activation_fw.<locals>.<listcomp>  s;       +21da&&  rM   inductor%activation_quantization_fwd_aten_passr&   )r"  r   r~  r  dict	enumerater   r   r   rs  r   rt  rW  rb  r   r   rE  r   r   rH  rI  r   r   r   r7  
update_argr   r   )r   r   r%  rX  rK  rZ  tensor_scale_nodessym_scale_nodesr?  rD   rV  
quant_nodeoutput_updated_argsr5  scale_nodesr  s                  @rK   quantize_activation_fwr    s   **1-F+a.K!!J*:66Iy(*%'O#K00 %5 %5$9==1599 #	5%>3c-&& <4E8 

 24ZIx 
 #:.. 7&--j9999#**:6666 **400  !&!4!4	<D"J/DhDDDD "5 " "J 	<DD Ie,j  OE*
 6M".6 6JOM2               +5h'   6?6L6L  
 1
2
2C$6K 
%36I#$$6OO 	 a233444Z@AAAQFAAAAAs   6BG  G$	'G$	c           
     
	  	 d | j         D             }d }|D ]O}|j                            dd          r0|j                            d           |j                            d          }t          j        j        j        d                             dd          r|                     |          5  d|j	        
                    dd	          z   	t          	fd
|D                       }d d d            n# 1 swxY w Y   |                     |          5  |                     t          j        j        j        j        ||f          }t          j        j        j                            |j        d         |          |j        d<   t#          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |                     |          5  |                     t          j        j        j        j        ||f          }t          j        j        j                            |j        d         |j        d                   |j        d<   t#          |j        d                   |j        d<   d d d            n# 1 swxY w Y   |                     |          5  |                     t          j        j        j        j        ||f          }t          j        j        j                            |j        d         |          |j        d<   t#          |j        d                   |j        d<   d d d            n# 1 swxY w Y   n|                     |          5  |                     t          j        j        j        j        ||fdt+          |j	                  z             }t          j        j        j                            |j        d         |          |j        d<   t#          |j        d                   |j        d<   d d d            n# 1 swxY w Y   t-          |j                                                  D ]$}||k    r||k    r|                    ||           %Qt4          d         dxx         dz  cc<   d S )Nc                (    g | ]}|j         d k    |S )r   r!  r   s     rK   r   z*quantize_activation_bw.<locals>.<listcomp>  s$    JJJ$M1I1I1I1I1IrM   r  Fdequant_typerl  r  
fp8_scale_
fp8_quant_ c              3  2   K   | ]}|j         k    |V  d S rH   r   )rl   	bwd_input
scale_names     rK   rn   z)quantize_activation_bw.<locals>.<genexpr>  s<       & &%$>Z77 "7777& &rM   r  r   rA  dequant_rD  r  %activation_quantization_bwd_aten_passr&   )r   r   r   popr   rs  r   rt  rE  r   r   r#  r   r   rH  rI  r   r   r   divrN  r   listuserskeysreplace_input_withr   )
r   	bw_inputsactivation_noderD   r  rV  divided_target_node_32dequant_nodeuserr  s
            @rK   quantize_activation_bwr    s   JJ%+JJJIO H@ H@9==1599 G	@IMM23339==88L%>3c-''? **400  !-	0A0A,PR0S0S!SJ!% & & & &)2& & & " "J               **:66  &+&9&9	<D"L1 ': ' 'O
 	<DD Ie,l  $(/
 ;R',U3; ;O(7               **?;; 
 
-2-@-@	*1-z: .A . .* :?9K9R9R',U3Z_U5K: :*/6 00F0KE0RSS +/>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 **+ABB  #(#6#6	<D4lC $7 $ $L
 	<DD27>  !%e,
 8O$)%08 8L%m4               **400  #(#6#6	<D"L1'#di..8 $7 $ $L 	<DD Ie,l  !%e,
 8O$)%08 8L%m4               TZ__..// @ @<''DO,C,C++D,???Z@AAAQFAAAAAs^   0:C66C:	=C:	BF44F8	;F8	BI==J	J	BL;;L?	L?	B(PP	P	
fwd_module
bwd_modulebwd_module_inputsdict[str, fx.Node]c                    t          dd  fd           t           j                   t          dd  fd           t          dd fd            j                            d	
          d         j        d         }|D ]}d|j        v r|t          j        dd|j                           }j                            |          5  j        	                    |j                  }d d d            n# 1 swxY w Y   |j
        d         }|j
                            |j
                   d|j
        d<   ||j
        d<   |                    |           j                            |           t          j        j        j        d                             dd          rt'          j                            d
                    }|d         }	t)          |          D ]}
t+          |
          s|
}	 n j                            d	
          d         j        d         }|D ]~}d|j        v rsj                            |	          5  j        	                    |j                  }d d d            n# 1 swxY w Y   |j
                            |j
                   |}	t-          j                   t          dd fd           d S )Nartifactc                     dddS )N,before_activation_quantization_fwd_aten_passstringr   encodingr\   r\   rM   rK   rp   z5perform_fp8_activation_quantization.<locals>.<lambda>T      B 
 
 rM   c                 4                          ddd          S NFTprint_outputinclude_strideinclude_deviceprint_readabler  s   rK   rp   z5perform_fp8_activation_quantization.<locals>.<lambda>X  #    :44tD 5 
 
 rM   metadata_fn
payload_fnc                     dddS )N+after_activation_quantization_fwd_aten_passr  r  r\   r\   rM   rK   rp   z5perform_fp8_activation_quantization.<locals>.<lambda>a      A 
 
 rM   c                 4                          ddd          S r  r  r  s   rK   rp   z5perform_fp8_activation_quantization.<locals>.<lambda>e  r  rM   c                     dddS )N,before_activation_quantization_bwd_aten_passr  r  r\   r\   rM   rK   rp   z5perform_fp8_activation_quantization.<locals>.<lambda>l  r  rM   c                 4                          ddd          S r  r  r  s   rK   rp   z5perform_fp8_activation_quantization.<locals>.<lambda>p  r  rM   r   r!  r   r  z^fp8_quant_pos_\d+_r  r  r  Tr  rl  r  r   r3  r  c                     dddS )N+after_activation_quantization_bwd_aten_passr  r  r\   r\   rM   rK   rp   z5perform_fp8_activation_quantization.<locals>.<lambda>  r  rM   c                 4                          ddd          S r  r  r  s   rK   rp   z5perform_fp8_activation_quantization.<locals>.<lambda>  r  rM   )r   r  r   r"  r   r   resubrE  r   r   updatereplace_all_uses_with
erase_noder   rs  r   rt  r   r  reversedr5   r  )r  r  r  quant_fwd_module_outputsfwd_noder  quant_bwd_inputr  quant_bwd_module_inputsbwd_input_locbw_inputscaled_fwd_module_outputsscale_bwd_inputs   ``           rK   #perform_fp8_activation_quantizationr  M  s	   
 
 

 
 
 
	 	 	 	 :+,,,
 

 
 
 
	 	 	 	 
 

 
 
 
	 	 	 	  */::h:GGJOPQR, 3 38=(()-r8=AAI !11)<< S S","2">">HM">"R"RS S S S S S S S S S S S S S S$>.9L ''666=AO !9:3?O 0++O<<<''	2226+	c-0 #'z'7'B'Bm'B'T'T"U"U/3 !899 	 	Hx((  ( %/$4$?$?8$?$L$LQ$O$TUV$W!1 	0 	0Hx},,%55mDD W W&0&6&B&B&B&V&VOW W W W W W W W W W W W W W W$++HM::: /:+,,,
 

 
 
 
	 	 	 	 	 	s$   !C::C>	C>	!I;;I?	I?	rh   OrderedSet[fx.Node] | Nonec                   t           j                            dd           	 d S |rd |D             ng }d | D             }t          j        j        j        d                             dd          rd | D             }|j                            d          d	         j        d	         }d
 |j                            d          D             }d}|D ]}	|	j	        |v rt          |	          r|	j	        |v r!t                              d|	j	                   Dd|	j        d<   |	j        d         j        |	j        d<   d||	j	                 j        d<   |	j        d         j        ||	j	                 j        d<   d}|rt          |||           d S d S )Nrl  c                    g | ]	}|j         
S r\   r  r   s     rK   r   z2enable_activation_quantization.<locals>.<listcomp>  s    ;;;t;;;rM   c                    i | ]
}|j         |S r\   r  r   s     rK   
<dictcomp>z2enable_activation_quantization.<locals>.<dictcomp>  s    CCCd$)TCCCrM   exclude_primalsFc                0    i | ]}d |j         v|j         |S )primalsr  r   s     rK   r  z2enable_activation_quantization.<locals>.<dictcomp>  s/     
 
 
 $	8R8RDIt8R8R8RrM   r   r!  r   c                    i | ]
}|j         |S r\   r  r   s     rK   r  z2enable_activation_quantization.<locals>.<dictcomp>  s)        	4  rM   r   z*Skipping quantization of static input %s: Tr  r   r  )inductor_configrt  r   r   rs  r   r   r"  r   r   r|  r<   debugr   rq  r  )
r*  r  r  rh   static_input_namessaved_values_namesr0  r  should_perform_fp8_quantrD   s
             rK   enable_activation_quantizationr    s    	044/	
 	
 	
 	 '	;;:;;;; 
 DClCCC6+	c
U##

 
(4
 
 
 $)444AA!DI!L $.$4$?$?=$?$Q$Q    %" 
, 
,9***t/D/D*y...		F	RRR26DI./(,	%(8(>DIn%JNdi(-.FG@D	%@P@Vdi(-n='+$ W+J
DUVVVVVW WrM   )rh   saved_sym_nodes%tuple[fx.GraphModule, fx.GraphModule]c               	    t          | |          \  }}}}| j                            d          }	g t          t          |	          }
g t          t
          |	          }g t          t          |	          }g t          t          |	          }g t          t          |	          }t          | j        |z   |z   |z   ||d          }t          j                                        }|                    d          D ]}|j        s+t          |j                   t          ||j                   4|rIt!          d |j        D                       r+t          |j                   t          ||j                   t          |          r&t          |j                   |st#          d          t%                      }g }g }|D ]S}t'          |          }|r+|                    |           |                    |           >|                    |           Tt-          | j                  }t/          j        ||          D ]c}d|j        vrt5          |j        d                   |z
  }t7          |d 	          D ]"}||vr|                    ||                    #||z  }d|                                 |                    ||z              g }g }g }D ]}t=          |j                            d          t@                    r|                    |           E|j                            d
d          r|                    |           v|                    |                                                                ||z              tC          |           tE                    D ]K\  }}| k    r@|j                            d
d          s%t#          d| d  dtC                               Lt          | j        |
|z   |z   |z   |z   | fdtG          tC                    tC          |          z   tC          |          z             D             z   d          }t          | j        |z   |z   |z   |z   |z   ||d          }tH          j%        &                    | |          }tH          j%        &                    | |          }tO          |||           ||fS )Nr  r   r!  r   c              3     K   | ]>}|j         t          j        j        j        j        u ot          |j                  d k    V  ?dS r   N)r   r   r   r   wait_tensorr   r   r  rk   s     rK   rn   z+_extract_fwd_bwd_modules.<locals>.<genexpr>  s_       )
 )
  H	2>FF "AG!)
 )
 )
 )
 )
 )
rM   z'backward_state_inputs must not be emptyr   c                    | j         S rH   r  )ss    rK   rp   z*_extract_fwd_bwd_modules.<locals>.<lambda>"  s    16 rM   rq   saved_tensor_with_no_vc_checkFzi=z, no_vc_check_start_idx=z, len(saved_values)=c                ~    g | ]9}|k    r"|t                    k     rt          |          nt          |          :S r\   )r   r/   r.   )rl   r6  no_vc_check_start_idxr*  s     rK   r   z,_extract_fwd_bwd_modules.<locals>.<listcomp>Z  sa     
 
 
  )))a#l2C2C.C.C 0222+A..
 
 
rM   r   )(r)  r   r"  filterr4   r5   r3   r2   r  r   r   distributedis_availabler  r/  r   allr   r$   r   addr   r   	itertoolschainr   r   rs   clearextendr   r   r   r   r  r4  r   _lazy_graph_module_make_graph_moduler  )!r  r*  r  r  rh   r%  r&  r'  r(  placeholdersprimal_inputstangent_inputsfwd_seed_offset_inputsbwd_seed_offset_inputsbackward_state_inputs	bwd_graphdistributed_enabledrD   saved_symbolssaved_sym_nodes_bindingsaved_sym_nodes_derivedsymbolsymbol_bindingsnew_symbolsr  saved_values_with_vc_checksaved_values_no_vc_checksaved_opaque_objectsr6  	fwd_graphr  r  r  s!    `                              @rK   _extract_fwd_bwd_modulesr    s    	!OOO CK/1B  %00M0BBL7fZ667M9vk<889NIv&9<HHIIv&9<HHIGf%7FFG2,&7:PP I  +88::$$$66 P Pz 	PL$)444OTY7777
 ! 	PS )
 )
 Z)
 )
 )
 &
 &
 	P
 L$)444OTY7777%% 	PL$)444( P$%NOOO /9llM     1 1*400 	1f%%%#**40000#**40000 3<3EFFO 7~VV % %	!!"49U#344}D)9)9::: 	? 	?A ''#**?1+=>>>>$
 25LLMMM "$! 4 4dimmE**,<== 	4 ''----Y]]:EBB 	4$++D1111&--d333325MMNNN :;; \**  4%%%9==!@%HH $rrr4Irr_bco_p_prr   3..l"%99OK
 
 
 
 
 L!!C(<$=$==O@T@TT 	
 
 
	
 	 I 3
	
	 	 !		!
  	  	 I &99,	RRJ&99,	RRJ"j*.I   z!!rM   )static_lifetime_input_indicesrh   _joint_inputsr	   r  list[int] | Nonec               
   g }d}| j         j        D ]1}t          |          st          |          st	          |          r|}2|t          d          | j         j        D ],}t          |          s|                    |           ||u r n-t          d |D                       t          |           }t          |           }	|rRt          | j                   d         't          j        d           t          | |||          S t          | d          } t           j        st%          |            t'          |            t)          |            |g }t+          | ||          }
g }g }t,          j                                        d d}d d}d fd}| j         j        D ]}|j        vr|j        dk    r&|j        d |                                 D             v r>|j        t,          j        j        j        j         t,          j        j!        j"        j         t,          j        j!        j#        j         t,          j        j!        j$        j         t,          j        j!        j$        j%        fv rtM          |          r|                    |            ||          r|j'        (                    d          tR          j*        k    r|                    |           : ||          r3|rt          d| d|j                   |                    |           x ||          s|j        dk    rt          d| d          fd|j+        D             }tY          d |D                       r|-                    |           t]          |          s|                    |           
t_          t`          1                    |          2                                          }t_          t`          1                    |          2                                          }t           j3        rtg          | j         |          }||
j4        }tk          | ||||          \  }}|j         6                    tn                     |j         6                    tn                     |r3|	r"tq          | ||ts          |                    \  }}tu          |          }t           j;        rddl<m;}  |||||           t{          |          }t{          |          }t}          |d          }ts          |
j?                  dk    rt}          |d          }||fS )!a  
    Partitions the :attr:`joint_module` in a manner that closely resembles the
    behavior observed in the original ``.forward()`` and ``.backward()`` of the
    callable, i.e., the resulting forward graph contains those operators that
    are executed in the original ``.forward()`` callable passed to
    :func:`aot_function`.

    The default partitioner collects the operators that are between the forward
    inputs and the forward outputs. This helps in finding the tensors which have
    to be stashed for the backward pass. These stashed tensors become the output
    of the generated forward graph. The remaining operators are then placed in
    the backward graph.

    .. warning::
        This API is experimental and likely to change.

    Args:
        joint_module(fx.GraphModule): The joint forward and backward graph. This
            is the result of AOT Autograd tracing.

    Returns:
        Returns the generated forward and backward Fx graph modules.
    Nzlast_node must not be Nonec              3  :   K   | ]}|j         d k    |j        V  dS r   Nr   r   r   s     rK   rn   z$default_partition.<locals>.<genexpr>  s9       $ $tw(/B/B	/B/B/B/B$ $rM   r   zxTrying to unsafely apply AC to a non-functional graph with the default partitioner. Falling back to min-cut partitioner.)r  r  Tis_default_partitionrD   rE   rF   r:   c                    d| j         v p6t          | j                             d          t          j        j                  S )NrA  r   )r   r   r   r   _subclasses
FakeTensorr   s    rK   	is_tensorz$default_partition.<locals>.is_tensor  s;    	) 
ZIMM%  %"3">.
 .
 	
rM   c                n    t          d | j        D                       ot          | j                  dk    S )Nc              3  @   K   | ]}|j         t          j        k    V  d S rH   )r   r   r   rl   r  s     rK   rn   z=default_partition.<locals>.is_multi_output.<locals>.<genexpr>  s,      GGDx//GGGGGGrM   r   )r  r  r   r   s    rK   is_multi_outputz*default_partition.<locals>.is_multi_output  s8    GGDJGGGGG $DJ!#	
rM   c                    |                      d          o-| j        dvo$ p!| j        t          j        j        j        j        uS )NF)impure_random)r   r   )	is_impurer   r   r   r   r   r  r   )rD   r  s    rK   r+  z$default_partition.<locals>.is_impure  sW     NNN// 		 (' U;ei&@&L&TT	
rM   r   c              3      K   | ]	\  }}|V  
d S rH   r\   )rl   kvs      rK   rn   z$default_partition.<locals>.<genexpr>  s7       3
 3
!QA3
 3
 3
 3
 3
 3
rM   r   z.Trying to apply AC on a graph with impure op: z, r   z	Expected z to be a tensorc                &    g | ]}|j         v|S r\   r  )rl   rm   forward_node_namess     rK   r   z%default_partition.<locals>.<listcomp>  s&    UUUAFBT4T4T14T4T4TrM   c              3  4   K   | ]}t          |          V  d S rH   r   rk   s     rK   rn   z$default_partition.<locals>.<genexpr>  s(      77!{1~~777777rM   r  r  rh   )is_impure_noder&   enable_activation_offloadingFr  rV   )@r   r   r  r4   r3   r   r5   r   r$   r   r   r0   warningswarn#min_cut_rematerialization_partitioncleanup_recompute_tagsr   (unsafe_allow_optimization_of_collectivesforce_save_collectivesforce_save_effectful_opsforce_save_bw_mutation_srcclassify_nodesr   r  r  r   r   named_modulesr   r   r   _assert_scalarr   profiler_record_function_enter_new_record_function_enter_record_function_exit_RecordFunctionr   r   r   r%   	MUST_SAVEr  r  r  r   r  r  fromkeysr  _sync_decision_cross_ranksrh   r  r   r   functionalize_rng_opsr   #reordering_to_mimic_autograd_enginer6  ,_activation_offloading.activation_offloadingr9   thread_graphsafe_rng_from_hopsrc   )r  r  r  r  rh   forward_nodes	last_noderD   graph_has_recomputable_opsgraph_has_recomputable_rng_ops	node_infor*  r  r$  r(  r+  backward_usages	fw_module	bw_moduler6  r  r0  s                       @@rK   default_partitionrW  y  sO   @ MI"(  t$$ 	
4(8(8 	<OPT<U<U 	I9:::"(  4   	'  &&&9E # $ $+$ $ $   "6l!C!C%=l%K%K"! W 233A6B ML   7 /.K	    .lQUVVV: -|,,,\***|,,,$,(*%3_ I LO+88::
 
 
 


 
 
 

 
 
 
 
 
$ "( 3& 3&9...7j  TY 3
 3
&44663
 3
 3
 &
 &
 ;IN)1 I9AI5=I4<I4D	
 	
 	
 t 	 ""4(((?4   	9==%%)9)CCC%%%9T?? 	) $ZTZZT[ZZ   %%%y 	D47o#=#= !BT!B!B!BCCCUUUUdjUUU7777777 
	 ""?333d## 	&%%%l3388::;;L4==99>>@@AAO( T1,2DlSS"*&/&K#3''$?  Iy O''7H'IIIO''7H'III! C) 	#8iC4H4H$ $ Iy 8	BB	 * 

	
 	
 	
 	
 	
 	
 	%$'		
 	
 	
 y))Iy))I.yeLLLI
9&''!++29$OOO	irM   g    .Arf  c                    | |j         z  S rH   )itemsize)rf  rq  s     rK   _tensor_nbytesrZ  U  s    5>!!rM   c                j   ddd| j         v r| j         d         }t          |t                    rdS t          |t          t          f          rt          fd|D                       S t          |t                    r-t          fd	|                                D                       S t          |t          j	                  r |          S t          d
t          |           d|            | j        dk    s"| j        t          j        j        j        j        u rdS t          d|  d          )Nr   objectrF   r}   c                    t          | t          j                  sdS t          t	          |                                 d          | j                  S )Nr      fallback)r   r   rN  rZ  r    rf  rq  r   s    rK   object_nbytesz_size_of.<locals>.object_nbytesZ  sB    !U\** 	1i		DAAA17KKKrM   r   r&   c              3  .   K   | ]} |          V  d S rH   r\   )rl   rm   rb  s     rK   rn   z_size_of.<locals>.<genexpr>g  s-      55A}}Q''555555rM   c              3  4   K   | ]\  }} |          V  d S rH   r\   )rl   rC  rm   rb  s      rK   rn   z_size_of.<locals>.<genexpr>i  s1      @@DAq}}Q''@@@@@@rM   zUnknown metadata type z	 on node r   r   r   zO didn't have `val` metadata; we should always have `val` metadata on the nodes.)r   r\  rF   r}   )r   r   r   r  r   sumr  itemsr   rN  r   r   r   r   r   r   rB  r   )rD   r   rb  s     @rK   _size_ofrg  Y  sV   L L L L
 	ic<(( 
	&1 dE]++ 	&5555555555T"" 	&@@@@CIIKK@@@@@@U\** 	& =%%%NDIINNNNOOOw*uy~/L/T T Tq
eeee  rM   c           	     .   ddl m}  |t                    }| j        D ]'}|j        dk    r||j        j        xx         dz  cc<   (t                              dt          |
                                t          j        d          d                     d S )Nr   )r   r   r&   %sTrr   reverse)collectionsr   r}   r   r   r   rW   r<   r  rs   rf  r   
itemgetter)r   r   cntrD   s       rK   
_count_opsro  v  s    ''''''%+c**C + +7o%%$%%%*%%%HHT6#))++8+>q+A+A4PPPQQQQQrM   !list[torch._ops.OpOverloadPacket]c                 v   g } t          t          j        j                  D ]}t	          t          j        j        |          }t          |t          j        j                  sA|                                D ]A}t	          ||          }t          j	        j
        |j        v r|                     |            nB| S rH   )dirr   r   r   r   r   r   OpOverloadPacket	overloadsr   	pointwiser   r   )r   	attr_nameopoverloadpacketoverloadop_overloads        rK   pointwise_opsrz    s    -/C(( 
 
	"59>9==*EJ,GHH 	(2244 	 	H!"2H==Ky"k&666

+,,, 7
 JrM   r   tuple[Any, ...]	depth_maprf   list[tuple[fx.Node, int]]c                    fd| D             }t          |                                t          j        d          d          S )Nc                j    i | ]/}t          |t          j        j        j                  &||         0S r\   )r   r   r   rD   r   )rl   argr|  s     rK   r  zsort_depths.<locals>.<dictcomp>  sE        #z#ux}?Q/R/RYs^  rM   r&   Trj  )rs   rf  r   rm  )r   r|  
arg_depthss    ` rK   sort_depthsr    sY       '+  J *""$$(*=a*@*@$OOOOrM   gmc                4  	
 t          j                    
i 	| j                            d          D ]}
                    |	fd          	|<   d t          | j        j                  D             d	
fd	}t          t          t          | j        j                            }d
}t          j        }|D ]"}|j        D ]}|         |k     r
|         }|}#|| S t          | j        j                  d
|                  D ]:}|j        dk    r-|j        t          j        j        j        j        u r ||           ;t          | j        j                  |         d
         D ]} ||           t          j                             | 
          }|S )a  
    This pass finds the first bwd node in the graph (by looking at users of
    tangents) and then reorders the graph by walking from this node to all the
    way to the end of the graph. At each op in this traversal, we insert this op
    in a new graph and try to bring only the relevant subgraph from the other
    non-bwd edges relevant for this op. This closely mimics the behavior of
    autograd engine.

    Why is this pass required in the first place?

    This is an artifact of how partitioners work today. The starting point of
    partitioner is a joint graph, which is fwd and then bwd graph. In the case
    of checkpointing, we keep portions of fwd graph in their original place in
    the joint graph, while obtaining a bwd graph. As a result, the resulting bwd
    graph has copies of recomputed fwd subgraphs followed by the original bwd
    graph. If we run this naively, this leads to bad memory footprint, because
    the fwd subgraphs are live for way longer duration than necessary. This pass
    reorders the operations such that we prioritize the ops for the original bwd
    graph while only realizing those ops from the fwd graph that are necessary
    at any given point in the graph.
    r   r!  c                    |          S rH   r\   r   s    rK   rp   z5reordering_to_mimic_autograd_engine.<locals>.<lambda>  s    A rM   c                    i | ]\  }}||	S r\   r\   rl   r5  rD   s      rK   r  z7reordering_to_mimic_autograd_engine.<locals>.<dictcomp>  s    BBB93T3BBBrM   rD   rE   rF   r+  c                X   | g}t                      }t          |          dk    rO|                                } | |v s| v r0|                    |            || j        z  }t          |          dk    Ot          |fd          }|D ]}                     | fd          | <   d S )Nr   c                    |          S rH   r\   )rm   orders    rK   rp   zSreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>  s    %( rM   rq   c                    |          S rH   r\   r   s    rK   rp   zSreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>  r   rM   )r$   r   r  r  all_input_nodesrs   r   )rD   	cur_nodesinsertable_nodesr   r   r  s      rK   insert_node_in_graphzAreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph  s    F	0:)nnq  ==??D'''43;;  &&& --I )nnq   ""28J8J8J8JKKK$ 	D 	DD!++D2B2B2B2BCCCII	D 	DrM   Nr   rD   rE   rF   r+  )r   r   r   r"  r   r  r   r  r  r5   mathinfr  r   r   r   r   r   r   r   GraphModule)r  rD   r  r  first_node_in_bwdminimum_ordertangentr  new_gmr   r   r  s            @@@rK   rL  rL    s   . 

I"$C ##}#55 @ @''.>.>.>.>??D		BB	"(.(A(ABBBED D D D D D D D& &bhn==>>NHM! ) )M 	) 	)DT{]** %d$(!	)  	 RX^$$%?u->'?%?@ ' '7o%%$+9M9U*U*U  &&&RX^$$U+<%=%?%?@ # #T"""" X!!"i00FMrM   rU  torch.fx.GraphModulerV  fw_nodebw_nodedevicetorch.device	rng_countlast_fwd_inputlast_bwd_input#tuple[torch.fx.Node, torch.fx.Node]c                   |j         }|t          d          | j        }	|j        }
t          j        j        j        }| j                            |          5  | j                            d|           }t          |          |j
        d<   |}ddd           n# 1 swxY w Y   |j                            |          5  |j                            d|           }t          |          |j
        d<   |}ddd           n# 1 swxY w Y   t          |j                  }||d<   | j                            |          5  |	                    d||j        g|j        R |          }ddd           n# 1 swxY w Y   |                    |           |	                    |           t          |j                  }||d<   |
                    |          5  |
                    d||j        g|j        R |          }|                    |           |
                    |           ddd           n# 1 swxY w Y   ||fS )	a%  
    Note [CUDA Graph Safe RNG Functionalization]

    CUDA Graph capture doesn't work with get_rng_state and set_rng_state because these functions operate on CPU values,
    while CUDA Graph RNG capture uses on-device CUDA tensors. To solve this, we use graphsafe_set_state with a
    CUDA Generator registered to the CUDA Graph before capture begins. graphsafe_set_state updates the generator's pointer
    to reference a different GeneratorImpl, ensuring subsequent calls are correctly forwarded to the desired generator
    (and its cuda-tensor RNG state during graph capture).

    For each RNG operation's forward/backward pair:

    - We create two generators initialized with identical values
    - Each forward and backward call advances its respective generator equally
    - This keeps generators synchronized so forward and backward operations use matching RNG values

    When forward is called multiple times before backward (causing desynchronization):

    - We save the forward RNG state
    - We update the backward Generator's state before executing backward

    Before each CUDA Graph replay, replay_prologue updates captured RNG pointers with current states, ensuring backward Generator
    changes are reflected during replay.

    This function modifies both forward and backward computation graphs by:

    Creating RNG state placeholders for both passes
    Updating the forward node to use graph-safe RNG state
    Updating the backward node to use graph-safe RNG state

    For more details: https://github.com/pytorch/pytorch/issues/113541
    Nzdevice_idx must not be Nonefwd_rng_state_r   bwd_rng_state_	rng_stater   r   r   )indexr   r   r   _prims	rng_primsgraphsafe_run_with_rng_staterE  r   r6   r   r  r   create_noder   r   r  r  inserting_before)rU  rV  r  r  r  r  r  r  
device_idxfw_graphbw_graphr  fwd_rng_statebwd_rng_state	fw_kwargsfunctional_fw_node
bwd_kwargs
rng_outputs                     rK   %apply_graphsafe_rng_functionalizationr    s^   R J:;;;HH#(<#9#V  
	(	(	8	8 ' '!334PY4P4PQQ$?
$K$K5!&' ' ' ' ' ' ' ' ' ' ' ' ' ' ' 
	(	(	8	8 ' '!334PY4P4PQQ$?
$K$K5!&	' ' ' ' ' ' ' ' ' ' ' ' ' ' ' W^$$I*Ik		(	(	1	1 
 
%11(.07<00	 2 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 !!"4555    gn%%J+J{		"	"7	+	+ % %))(.07<00	 * 
 

 	%%j111G$$$% % % % % % % % % % % % % % % >))sI   7BB!B?7DD	D (E44E8;E8AH55H9<H9num_sym_nodesc                6  ' t          j                    }dd}d d	'd!d} ||           } ||          } ||          }	i }
| j        j        D ]}t	          |          rtt          |j        d          r_t          j        j	        |j        j
        v rB|j        |vs	|j        |	vrV||j                 }||j                 }|	|j                 }||d|
|<   t          j        j        j        }t          j        j        j        }d }|j                            d          D ]}d|j        v r|} n|t#          d          g }t%          t'          |j                            d                              }t%          t'          |j                            d                              }t)          'fd|
                                D                       }|                    t          j        d                     t1          |          dk    }t          j        j        }t4          j        o| o|j         p|j        j        }t?          |
                                          D ]V\  }}|d         }|d         } '|          }|j        }|j        }|r'|%|j         dk    rtC          ||||||||          \  }}X|"                    |          5  |#                    d||j        g|j$        R |j%                  }|#                    dtL          j'        |dfi           } ||          |j(        d<   |#                    dtL          j'        |dfi           } tS          j)        |j(                  | _(        |*                    |            |+                    |           |,                    |           d d d            n# 1 swxY w Y   |"                    |          5  dt%          |           }!|-                    |!          }" ||          |"j(        d<   d d d            n# 1 swxY w Y   |"                    |          5  |#                    d||"|j        g|j$        R |j%                  } |*                    |            |+                    |           d d d            n# 1 swxY w Y   X|rt%          t]          |j                            d                              }#|#j$        d         }$t1          |$          |z
  }%|$d |%         t_          |          z   |$|%d          z   }&|j        0                    |&           |j        +                    |#           |1                                 |1                                 ||fS )"Ngmodr   rF   r  c                    i }| j         j        D ]I}|j        dk    r<t          |j        d          r't
          j        j        |j        j        v r
|||j	        <   J|S )Nr   r   )
r   r   r   r   r   r   r   r   r   r   )r  random_nodesrD   s      rK   get_rng_opsz*functionalize_rng_ops.<locals>.get_rng_opsb  sd    +-J$ 	/ 	/D?**DK00 +I59III*.TY'rM   rD   rE   torch.device | Nonec                    d| j         vrdS | j         d         }t          |t                    s|f}|D ]5}t          |t          j                  r|j        j        dk    r	|j        c S 6t          j        d          S )zV
        Check the example value of the node outputs to find the device type.
        r   Ncudacpu)r   r   r   r   rN  r  r   )rD   
candidates	candidates      rK   
get_devicez)functionalize_rng_ops.<locals>.get_devicem  s     	!!4Yu%
*e,, 	'$J# 	, 	,I)U\22 ,#(F22$++++|E"""rM   r  rd  c                \   ddl m}  |            }|t          d          |5  | H| j        dk    r=|                    t
          j                                                  cd d d            S |                    t          j                              cd d d            S # 1 swxY w Y   d S )Nr   )detect_fake_modezfake_mode must not be Noner  )torch._guardsr  r   r   from_tensorr   r  get_rng_state)r  r  	fake_modes      rK   get_sample_rng_statez3functionalize_rng_ops.<locals>.get_sample_rng_state  s2   222222$$&&	 !=>>> 	@ 	@!fkV&;&; ,,UZ-E-E-G-GHH	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ (()<)>)>??	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@ 	@s   >B!/%B!!B%(B%r   )fwdbwdr   r!  r  zaCouldn't find tangent node in graph inputs. This is unexpected, please file a bug if you see thisc              3  :   K   | ]} |d                    V  dS )r  Nr\   )rl   	node_pairr  s     rK   rn   z(functionalize_rng_ops.<locals>.<genexpr>  sC        )2

9U#$$     rM   r  r&   r  r  r  r   r  r   r   rng_state_output_r   )r  r   rF   r  )rD   rE   rF   r  )r  r  rF   rd  )2r  countr   r   r   r   r   r   r   r   r   r   r  r  run_and_save_rng_staterun_with_rng_stater"  r   r#  r  r$   valuesdiscardr  r   rs  r   graphsafe_rng_functionalizationfallback_randomtest_configs*graphsafe_rng_func_ignores_fallback_randomr  r   r  r  r  r   r   r   r   r   copyr  r  r   r   r$  r   r   	recompile)(r  rU  rV  r  uidr  r  joint_graph_rng_opsfw_graph_rng_opsbw_graph_rng_opsrecomputable_rng_ops_maprD   	base_noder  r  run_and_save_rngr  bw_tangent_start_nodefw_rng_state_outputsr  r  devicesmulti_cuda_devices
ind_config'use_rng_graphsafe_rng_functionalizationr  r  r  r  r  r  stater  
state_namebw_rng_state_nodefw_output_node
fw_outputssym_node_start_idxr   r  s(                                          @rK   rK  rK  G  sf   2 /

C	 	 	 	# # # #$	@ 	@ 	@ 	@ &+l33"{9--"{9--!"( S S4  	SV,,	S 	1T[5EEE
 y 000DIEU4U4U+DI6I&ty1G&ty1G:A'2R2R$Y/|-D/B **m*<<  	!!$(!E " $o
 
 	
 +-(9?#=#=#=#O#OPPQQN(9?#=#=#=#O#OPPQQN    6N6U6U6W6W    G OOEL''((( W) 'J. 	
""	
 ** R&Q , !**B*I*I*K*K L L P- P-	9E"E"G$$?? 4G	-"v%%-R	. 	.*NNN **733 !3 !3%-%9%9#$     #> &: 	& 	&" !,,#$,a0	 -   %9$8$@$@
5!%11#$*  2  
 #')GL"9"9
--j999##G,,,$++E222C!3 !3 !3 !3 !3 !3 !3 !3 !3 !3 !3 !3 !3 !3 !3H **+@AA M M<c<<
$,$8$8$D$D!0D0DV0L0L!&u-M M M M M M M M M M M M M M M
 **733 - -%11#& * ! 
 #> 2 
 

 --j999##G,,,- - - - - - - - - - - - - - -&  
3d9?#=#=#=#J#JKKLL#(+
 __}<***+())*+,,-. 	
 	w'''"">222is8   C'OO	O	(;P//P3	6P3	AR33R7	:R7	c                    | j         j        D ]Y}t          |j        t          j        j                  r3|j        j        dk    r#t          |          st          j
        |j        d<   ZdS )z
    By default, the partitioner is not allowed to recompute collectives
    unless they come from a user-annotated AC region.
    See Note [Recomputing collectives in the partitioner]
    r   r   N)r   r   r   r   r   r   r  r   r   r%   rH  r   )r  rD   s     rK   r=  r=  +  st     "( @ @t{EJ$9::	@%);;;"4(( < &6%?DIk"@ @rM   c                    dfd| j         j        D ]:}t          |          r)t          |          st	          |          s |           ;dS )a\  
    Force save outputs from with_effects nodes wrapping effectful ops.

    Effectful ops (registered via _register_effectful_op) should not be recomputed
    because they may have arbitrary global side effects (I/O, RNG state, collectives,
    etc.). We mark the tensor outputs of with_effects as MUST_SAVE to prevent
    recomputation of the effectful op.

    The with_effects node returns a tuple (token, result). We recursively find all
    leaf outputs extracted via getitem and mark them as MUST_SAVE. Since these are
    saved, the with_effects op doesn't need to be recomputed in backward.
    rD   rE   rF   r+  c                    | j         D ]h}|j        t          j        u rS |           t	          |j                            d          t          t          f          st          j
        |j        d<   id S )Nr   r   )r  r   r   r   r   r   r   r   r  r%   rH  )rD   r  mark_getitem_outputss     rK   r  z6force_save_effectful_ops.<locals>.mark_getitem_outputsH  sx    J 	H 	HD{h...$$T***!$)--"6"6FF H-=-GDIk*		H 	HrM   Nr  )r   r   r   r   r
  )r  rD   r  s     @rK   r>  r>  :  s    H H H H H H "( ' 'D!!	'"4((	' )..	'
 ! &&&' 'rM   c                   t                      }t          | j        j                  D ]}|j        dk    r|j        t          j        j        j	        j
        u }|rmt          |          r |                    |j        d                    t          |          r.|j        d         |v rt          j        |j        d         j        d<    d S d S )Nr   r   r&   r   )r$   r  r   r   r   r   r   r   r   r   r   r  r  r   r  r%   rH  r   )r  has_mutation_in_bwrD   is_copy_s       rK   r?  r?  X  s     5?LL+122  7h;%)."6">> 	+D11 5"&&ty|444*400 LTYq\EW5W5W1A1K	!!+. EE! rM   c                    | j         t          j        k    rdS | j        d         }t	          |          t
          j        urt          dt	          |                     d|j        vo
| j	        dk    S )NFr   z#expected parent to be fx.Node, got rA  r   )
r   r   r   r   r   r   r   r   r   r   )rD   parents     rK   is_getitem_of_multi_outputr  o  sk    {h&&&uYq\FF||27""Q4<<QQRRR+J?0JJrM   r   c               X   | j         j        D ]}t          |          r|j        D ]S}t          |          rBd|j        v r9d|j        v r0|j        d         |j        d         k    rt
          j        |j        d<   T|j                            dd          r2t          d |j        D                       st
          j        |j        d<   d|j        vrWt          d |j        D                       r9t          |          rd|j
        d         j        v s|rt
          j        |j        d<   | S )a  
    If there are two consecutive checkpointed blocks with no operator in
    between, we would still want to stash the tensor at the boundary of
    checkpointed blocks. The following pass makes the last output node
    non-recomputable to allow for that.
    ac_graph_idr   has_backward_hookFc              3  4   K   | ]}t          |          V  d S rH   r   r'  s     rK   rn   z)cleanup_recompute_tags.<locals>.<genexpr>  sC       E E)-t$$E E E E E ErM   c              3  4   K   | ]}t          |          V  d S rH   r  r'  s     rK   rn   z)cleanup_recompute_tags.<locals>.<genexpr>  s*      @@TN4((@@@@@@rM   r   )r   r   r   r  r   r%   rH  r   r   r  r   )r  r   rD   r  s       rK   r;  r;  x  so    "( +@ +@$ *	@
 H H"4((H%22%22	-049]3KKK-=-GDIk*y}}0%88 D E E15E E E B B D& *:)C	+&**@@TZ@@@@@ + +400	 + 6CdiPQlFW5W5W$ 6X &6%?DIk"rM   rS  min_cut_optionsdont_ban)tuple[list[fx.Node], OrderedSet[fx.Node]]c                  <=>?@ABCDEFGHIJK t                      t                      It          rZt          d | j        D                       }|t          d Ij        D                       z
  }t
                              d|           dbd	=dbd
>db=>IfdA	 dd lGn"# t          $ r}t          d          |d }~ww xY wdcAIfdCddCIfd}dcAfdBdeBIfd} Gj
                    Ht                      <dfdg<HIfd}	| j        D ]}
|
j        dk    r|
j        v ra|
j        vr,H                    |
j        dz   dt           j        d           n,H                    |
j        d z   dt           j        d!           yt%          |
          r,H                    |
j        d z   dt           j        d"           t'          |
          r |	|
d#           nt)          |
          r |	|
d$            ||
          }                    |
          r|r |	|
|           d%|
j        vod&|
j        vp.d%|
j        v o%t/          |
j        d%         t0          j                   }t5          |
          rt7          t9          |
                    }d }n^|rHt/          |
j                            d%          t<          t>          f          rd'}d }n#t           j        }d(}n ||
j                   \  }}|rH|t           j        k    s|tB          k    r-H                    |
j        d z   |
j        dz   |d)|            n(H                    |
j        d z   |
j        dz   |*           |
j"        D ]5}H                    |
j        dz   |j        d z   t           j        d+           6dhAfd0}j#        rj$        D ]}fd1|j"        D             }fd2|j"        D             }tK          |          dk    r ||tM          |                    }tO          |j"                  D ]}                    |          rz(                    |          |k    ra A||          rU|<v rAt
                              d3|(                    |          ||(                    |                      |	|           j)        rt                      }| j        D ]w}                    |          s(                    |          |fg}(                    |          }tK          |          dk    rtU          j+        |          \  }}||v r0|,                    |           (                    |          |d4z   k    rctK          |          dk    rPt
                              d5||(                    |          (                    |                      |	|           nm|j"        D ]Q}                    |          r: A||          r.|<vr*tU          j-        |(                    |          |f           RtK          |          dk    y	  Gj.        Hd6d          \  }}n# Gj/        $ r}ta          tb          j2                  }d }d @| j3        } 	 | r| 4                    d7d8d89          ntk          |           @tm          d:d; @fd<=           to          d>d?          }tq          |d@          5 }!|!9                    @           d d d            n# 1 swxY w Y   n# tt          $ r}dA| dB}Y d }~nd }~ww xY wdC;                    Gj<        j=        >                    H                    ?tm          d:dD ?fdE=           t          H          }"|"r
i }#d6g}$didG}%|"D ]!\  }&}'}(|$@                    |'           |&d6k    r5 |%|'          })|#A                    |)g           @                    |(           W|'dk    r5 |%|&          })|#A                    |)g           @                    |(            |%|&           |%|'          k    r5 |%|&          })|#A                    |)g           @                    |(            |%|&          }* |%|'          }+|#A                    |+g           @                    dH|*            #g },|#B                                D ];\  }-}.|,@                    dI|- dJ           |.D ]}/|,@                    dK|/            <dC;                    |,          }0dL;                    |$          }1t          H          \  }2JJrtm          d:dM JfdN=           |rdO| dCnd}3|2r	|3dP|2 dCz  }3d}4|rdQ}4t          dR|0 dS|1 dC|3 |4           |t
                              dT           t
                              dUt          GHfdV                     t          H            d }~wtt          $ rW t
                              dT           t
                              dUt          GHfdW                     t          H            w xY w|\  }5Ft                      }6HfdX|5D             D ]'\  K}7|6E                    FKfdY|7D                        (t                      }8|6D ]_\  }9}:|9d dZ         |:d d[         k    r%t          d\|9d dZ          d]|:d d[                    |9d dZ         }-|8,                    |-           `t          |           Dd^ t          | j                  D             Et          Dfd_|8D             Efd`a          };|;<fS )jNc              3     K   | ]=}|j         d k    t          |j        d          "t          |j        j                  V  >dS )r   _overloadpacketN)r   r   r   r   r  r   s     rK   rn   z solve_min_cut.<locals>.<genexpr>  sZ       &
 &
w/))gdkCT.U.U) +,,))))&
 &
rM   c              3  4   K   | ]}t          |          V  d S rH   )r   rl   r6  s     rK   rn   z solve_min_cut.<locals>.<genexpr>  s9       4
 4
CFF4
 4
 4
 4
 4
 4
rM   z&Ops banned from re-materialization: %sarE   brF   r:   c                   |j         t          j        j        j        k    rdS |j        d         }t          j        j                            |          \  }}|D ]2}|j	        |         }| |u r dS t          |t                    r| |v r dS 3dS NFr   T)r   r   r   r  auto_functionalizedr   _higher_order_opsauto_functionalizeget_mutable_argsr   r   r  )r  r  
mutable_opmutable_arg_namesrC  r   r  s          rK   !can_fuse_into_auto_functionalizedz8solve_min_cut.<locals>.can_fuse_into_auto_functionalized  s    8uy-AAA5VAY
 #6GG
 
	

 & 	  	 D(4.CCxxtt#t$$  8844urM   c                    |j         t          j        j        j        k    rdS |j        d         }|D ]/}|j        d         }|t          d          ||         }| |u r dS 0dS )NFtensors_to_cloner   zkwargs must not be NoneT)r   r   r   r  r  r   r   )r  r  r  r   r   r  s         rK   .can_fuse_into_triton_kernel_wrapper_functionalzEsolve_min_cut.<locals>.can_fuse_into_triton_kernel_wrapper_functional  s|    8uy-NNN5H%78% 	 	D(8,F~$%>???,CCxxtt urM   c                H   t          |          t          j        k    rdS  | |          rdS  | |          rdS | j        t          j        u r*| j        d         j        t          j        j	        j
        u rdS                     |           o                    |          S )NTr   F)r8   r   catr   r   r   r   r   r   r  r  rL   )r  r  r  r  op_typess     rK   rL   z!solve_min_cut.<locals>.is_fusible  s     1))4,,Q22 	499!Q?? 	4H(((q	 y%FG G
 5""1%%@(*=*=a*@*@@rM   r   zANeed networkx installed to perform smart recomputation heuristicsrD   c                z                        |           rdS t          | g          }t          |          dk    r|                                }|j        D ]P}                    |          s ||          s dS                      |          r|                    |           Qt          |          dk    dS r  )rS   r$   r   r  r  rx   r  )rD   r  curr  rL   rS  r  s       rK   is_materialized_backwardsz0solve_min_cut.<locals>.is_materialized_backwards   s    D!! 	5v&&	)nnq  --//C	 ( ( //55  jjd>S>S  44##D)) (MM$''' )nnq   urM   r   c                @   | j         dk    rdS | j        t          j        u rdS | j                            dd          t          j        k    rdS t          j	        r
                    |           rdS | j        t          j        j        t          j        j        fv rdS j        r                    |           sdS n?                    |           rdS                     |           rdS t'          |           rdS j        r; |           r0t*                              d	| t/          | j                             d
S | j        dk     r| j        t          j        k    rdS j        r8t9          d | j        D                       }t=          |           }|dz  |k     rdS dS )zRReturns reason string if node should be banned from recomputation, None otherwise.r   Nr   zmarked MUST_SAVEznot in recomputable allowlistz	random opzcompute intensive opznon-builtin opzmaterialized backwards: %s %szmaterialized in backwardi  ztoo far from backwardc              3  h   K   | ]-}t          |t          j                  t          |          V  .d S rH   )r   r   r   rg  r  s     rK   rn   zBsolve_min_cut.<locals>.should_ban_recomputation.<locals>.<genexpr>A  sM       % % !*Q2H2H%% % % % % %rM   r   zreduction op)r   r   r   r   r   r   r%   rH  r   recompute_viewsrS   r   lift_fresh_copyr   
lift_freshr   rU   rQ   rO   r   r   r<   r  r   r  dist_from_bwmax_dist_from_bwr   re  r   rg  )rD   input_tensors_sizeoutput_sizer  r  r  s      rK   should_ban_recomputationz/solve_min_cut.<locals>.should_ban_recomputation  s   7o%%4;(***49==d++/?/III%%! 	h&6&6t&<&< 	4;4/79PQQQ42 		(++D11 7667 !!$'' #"{,,T22 .--(.. ('' 7 	.<U<U=
 =
 	. II5tU4:=N=NOOO-- $$):V=T)T)T** + 	&!$ % %%)Y% % % " " #4..KQ!333%~trM   c                d      j         dk    rdS t           fd j        D                        S )Nr   Tc              3  0   K   | ]} |          V  d S rH   r\   )rl   r  rL   rD   s     rK   rn   z9solve_min_cut.<locals>.is_materialized.<locals>.<genexpr>M  s/      EE$zz$--EEEEEErM   )r   r  r  )rD   rL   s   `rK   is_materializedz&solve_min_cut.<locals>.is_materializedI  sA    7m##4EEEEE$*EEEEEEErM   rh   ra   tuple[float, str | None]c           
        t           j        r| |v rdS t          |           }t           j        r#                    |           rt
          j        dfS t          | j        d         t                    r.t          | j        d         t          j                  s	t          dfS t          |dt          t          | j        d          d          z  z            } |           r|dfS |d	z  dfS )
zReturns (weight, cannot_save_reason).

        cannot_save_reason is None for finite weights, or a string explaining
        why the node cannot be saved for infinite weights.
        r  zview op (recompute_views=True)r   z$SymFloat (non-SymInt symbolic value)g?rx  r&   N   )r    treat_parameters_as_free_to_saverg  r  rS   r  r  r   r   r   r   r   INT_INFr}   r<  r>  r  )rD   rh   mem_szr%  r  s      rK   get_node_weightz&solve_min_cut.<locals>.get_node_weightO  s     3	3337$! 	>h&6&6t&<&< 	> 8===di&55 	Gdi.== G FFF cST%6!<!<a@@@A
 
 ?4   	$4<A:t##rM   r  reasonr   c                                        |           rdS | v rDt          | j        t          j        j                  o| j        j        dk    }t          j        s|sdS t          |           rdS d| j
        v r't          | j
        d         t          j                  rdS                     |                                d| j        dz   t          j        |rd| nd           d	S )
NFr   r   r   _inzcannot recompute: zcannot recomputecapacityr-  T)rS   r   r   r   r   r  r   r   r<  r   r   r   r  add_edger   r  r  )rD   r-  is_collectivebanned_nodesr  nx_graphr  s      rK   ban_recomputation_if_allowedz3solve_min_cut.<locals>.ban_recomputation_if_allowedz  s   D!! 	58 4;
(=>> @K)-??  > m u $ 	5DI*TYu-=u~"N"N5
 	IX4:R0000@R	 	 	
 	
 	
 trM   r   _outsinkz;must be available for backward: input required for gradientr0  r/  z3must be computed in backward: required for gradientz+must recompute: marked by checkpoint policyzprimal inputzforward RNG seedr   rA          znon-tensor outputzcannot save: )r1  zdata dependencystart_nodesr_   	max_ranger}   c                   g }| D ]-}t          j        |
                    |          |df           .t          |          dk    rt          j        |          \  }}}|s
                    |          S |j        D ]l}
                    |          rU
                    |          |k    r1
                    |          | 	||          f}||vrt          j        ||           mt          |          dk    |S )z
        Finds the first unfusible node in the chain of nodes starting from
        `start_nodes` and returns its position.
        Tr   )heapqheappushr   r   heappopr  rx   )r:  r;  sorted_nodesrm   rC  rD   node_is_fusibler  r   rL   rS  s            rK   find_first_unfusiblez+solve_min_cut.<locals>.find_first_unfusible	  s4   
 9; 	O 	OAN<)*@*@*C*CQ)MNNNN,!##',}\'B'B$At_" 4 --d333
 
: 
:++D11 	: --d33i?? !..t44"
4..6C
 ,..|S999 ,!## rM   c                d    g | ],}                     |                              |          -S r\   )rx   r   rl   r  rS  s     rK   r   z!solve_min_cut.<locals>.<listcomp>	  sK       ++D11&&t,,  rM   c                >    g | ]}                     |          |S r\   )rx   rD  s     rK   r   z!solve_min_cut.<locals>.<listcomp>$	  s<       I4L4LT4R4R  rM   z1used above/below fusible %s:(%s) -> %s -> %s:(%s)rx  ztoo long %s %s %s %sr   FTr  r  c                     dddS )Nmin_cut_failed_fx_graphr  r  r\   r\   rM   rK   rp   zsolve_min_cut.<locals>.<lambda>	  s    5 (% % rM   c                      S rH   r\   )fx_graph_strs   rK   rp   zsolve_min_cut.<locals>.<lambda>	  s    < rM   r  min_cut_failed_graphz.txtwz(failed to write: )
c                     dddS )Nmin_cut_failed_edge_listr  r  r\   r\   rM   rK   rp   zsolve_min_cut.<locals>.<lambda>	  s    2$! ! rM   c                      S rH   r\   )edge_list_strs   rK   rp   zsolve_min_cut.<locals>.<lambda>	  s    } rM   	node_namec                n    dD ]1}|                      |          r| d t          |                    c S 2| S )N)r/  r7  )endswithr   )rR  suffixs     rK   get_base_namez$solve_min_cut.<locals>.get_base_name	  sO    - 9 9F ))&11 9(CKK<88889  rM   zdepends on z  :z    - z -> c                     dddS )Nmin_cut_failed_svgr  r  r\   r\   rM   rK   rp   zsolve_min_cut.<locals>.<lambda>	  s     4$,) ) rM   c                      S rH   r\   )svg_contents   rK   rp   zsolve_min_cut.<locals>.<lambda>	  s    { rM   zFX graph dump: zMin-cut graph visualization: z[Production debugging: Use tlparse to extract debug artifacts (min_cut_failed_fx_graph, min_cut_failed_edge_list, min_cut_failed_svg)]
a  AOT Autograd failed to partition the joint forward-backward graph.

The partitioner determines which intermediate values to save from the forward pass vs recompute in the backward pass. This error means a value is required for backward, but cannot be saved AND cannot be recomputed.

This is a bug in PyTorch. Please file an issue at https://github.com/pytorch/pytorch/issues

Nodes involved in the conflict:
z

[For PyTorch developers: one of the above constraints is wrong. Either the node should be recomputable, saveable, or not required for backward.]

[Debug: min-cut path] z-Failed to compute min-cut on following graph:ri  c                 h    d                      j        j                                                S NrM  join	readwriteedgelistgenerate_edgelistnxr5  s   rK   rp   zsolve_min_cut.<locals>.<lambda>	  &    		","7"I"I("S"STT rM   c                 h    d                      j        j                                                S r]  r^  rc  s   rK   rp   zsolve_min_cut.<locals>.<lambda>	
  re  rM   c              3  ,   K   | ]}||         fV  d S rH   r\   )rl   rm   r5  s     rK   rn   z solve_min_cut.<locals>.<genexpr>
  s,      88Q$888888rM   c              3  (   K   | ]}|v |fV  d S rH   r\   )rl   r.  non_reachableus     rK   rn   z solve_min_cut.<locals>.<genexpr>
  s1      AAa=.@.@q!f.@.@.@.@AArM   znode_in[:-3]=z != node_out[:-4]=c                    i | ]\  }}||	S r\   r\   r  s      rK   r  z!solve_min_cut.<locals>.<dictcomp>
  s    HHHic4cHHHrM   c              3  (   K   | ]}|         V  d S rH   r\   rl   rD   name_to_nodes     rK   rn   z solve_min_cut.<locals>.<genexpr>!
  s(      22d	222222rM   c                    |          S rH   r\   )r   node_idxs    rK   rp   zsolve_min_cut.<locals>.<lambda>!
  s    (1+ rM   rq   )r  rE   r  rE   rF   r:   rV   )rD   rE   rF   r   )rD   rE   rh   ra   rF   r&  )r  )rD   rE   r-  r   rF   r:   )r:  r_   r;  r}   rF   r}   )rR  r   rF   r   )Jr$   get_default_op_listr;   r   rC   r<   r  networkxImportErrorr   DiGraphr   rc   rd   r2  r   r  r  r   r4   r3   rx   r   r   r   rN  r   r=  r   r   r   r   rh   r*  r  r   ru   r   r<  r   r   r   r=  r?  r  r>  minimum_cutNetworkXUnboundedr:   r   handlersowning_moduler  r   r   _get_unique_pathopenwrite	Exceptionr_  r`  ra  rb  _find_infinite_capacity_pathr   
setdefaultrf  visualize_min_cut_graphr   r  r   get_name_to_noder  rs   )Lr   rS  r  r  joint_module_opsops_ignoreder"  r,  r6  rD   
ban_reasonis_non_tensor_nodeweightcannot_save_reasonr  rB  	used_nodeordersfw_usersfirst_unfusible_usevisited
start_nodefusiblestart_orderrC  r  	cut_value	partitionunbounded_excstructured_tracing_enabledfx_graph_filer  finf_pathnode_constraintsraw_path_nodesrV  	from_nodeto_noder-  base	from_baseto_baseconstraint_linesrR  constraintscconstraints_strraw_path_strsvg_pathlocal_files_msgtlparse_msg	reachablecutsetnbrs	cut_nodesnode_innode_outr*  r4  r  r  rQ  rI  rL   r%  r  rp  rr  ri  rd  r5  r  r[  rj  sL    ```                                                        @@@@@@@@@@@@@@@@rK   solve_min_cutr    sD    <<"$$H 	H% &
 &
#)&
 &
 &
 
 

 ' 4
 4
$54
 4
 4
 *
 *
 
 	9;GGG   (   A A A A A A A A&   O
 
	
       9 9 9 9 9 9 9 9vF F F F F F&$ &$ &$ &$ &$ &$ &$P rz||H(2L         @ ! V V7h9...9555!!I&!XX	 "     !!I%!XP	 "    $ 	
 	E!D	     d 	C((~>>>> && 	C((/ABBB
 .-d33
##D)) 	;j 	;((z::: "E}DI'EUty SDIe4Del)S)S%S 	 t 	=..//F!% 	 $)--..@P0QRR 9%)""%8"")8i;* *&F&
  	V6TX#5#579J9J	E!	F";'9;;	      di%/V1CfUUUJ 	 	D	F"	E!(	     	2      4 , ;"4 	; 	;I   %O  F
   !*  H 6{{Q&:&:8S[[&Q&Q#!)/22 ; ;D!0066;%22488;NNN&Jy$77 O  <//$O%%229==/ %22488   54T::: 1 #V'1||%+ !	V !	VJ++J77 ''
33Z@2G $00<<Kg,,""w//3'>>C    **3//+2CCCG))HH."!..s33!..z::   10555I V VD!0066V&JsD11V !44w1G1G1M1Mt0TUUU5 g,,""8b-r~h&II	99 V V V%))*<%=%=" %)#'"0	6
  &++!&tD ,    %%    0///    --CVLLMmS)) &Q%%%& & & & & & & & & & & & & & & 	6 	6 	65555MMMMMM	6 		","7"I"I("S"STT  -,,,	
 	
 	
 	
 099 ^	! 68&ZN! ! ! ! /7  *	7F%%g... (((=11D$//b99@@HHHH&&(=33D$//b99@@HHHH"]9--w1G1GGG(=33D$//b99@@HHHH !.i 8 8I+mG44G$//<<CC1i11   
 +-*:*@*@*B*B : :&	; ''(9Y(9(9(9:::$ : :A$++LQLL9999: #ii(899O!;;~66L %<H$E$E!Hk  ! !  3222    8EL3-3333"   P#O8#O#O#OO K) a 
 ! #! ! *6! ! #! ! !  !!" 	@AAATTTTT 	
 	
 	
 	 ))) 	 	 	@AAATTTTT 	
 	
 	
 	 )))	  )I}*4,,F8888i888 B B4AAAAAdAAAAAAA!+I# ! !3B3<8CRC=(( OOO"OO   CRCL	i    #K00LHH9[5F+G+GHHHH2222	2228M8M8M8M  L %%s   $B) )
C3CCY, ,i$7$h A\);\\)\!	!\)$\!	%\)(h )
]3\>9h >]J=h  A$i$r5  nx.DiGraph[str, dict[str, Any]]!list[tuple[str, str, str]] | Nonec                   t          dg          }t          dg fg          }|r|                                \  }}|                     |          D ]}||v r| |         |         }|                    dd          }|t
          j        k    s|t          k    rW|                    dd          }|||f}	||	gz   }
|dk    r|
c S |                    |           |	                    ||
f           |dS )zBFS from source to sink following only infinite-capacity edges.

    Returns a list of (from_node, to_node, reason) tuples representing the path,
    or None if no such path exists.
    r   r1  r   r-  unknownr8  N)
r$   r   popleft
successorsr   r  r  r*  r  r   )r5  r  queuerD   	edge_pathneighbor	edge_datar1  r-  new_edgenew_paths              rK   r  r  &
  s     ($$G <A8R.AQ;R;RE
 3--//i ++D11 	3 	3H7"" x0I }}Z33H48##x7':':"x;; (F3$z1v%%#OOOH%%%h1222  3  4rM   	base_name	extensionc                   |  | }t           j                            |          s|S d}t           j                            |  d| |           r+|dz  }t           j                            |  d| |           +|  d| | S )zGet a unique file path, appending a counter if the file already exists.

    For example, if "min_cut_failed.svg" exists, returns "min_cut_failed_1.svg".
    r&   rC  )ospathexists)r  r  r  counters       rK   r{  r{  G
  s    
 $$$D7>>$ G
'..I<<<<<
=
= 1 '..I<<<<<
=
= ..'.9...rM   tuple[str | None, str | None]c                    ddl }	 ddl}n-# t          $ r  t                              dd           Y dS w xY w|j                            |                                           }|                    |          d         }|	                                D ]}| |
                                         |                                         d         }|                    t          |                     |t          d          k    r|                    d	           |                                                    d
          }t%          dd          }t'          |d          5 }	|	                    |           ddd           n# 1 swxY w Y   ||fS )zVisualize the min-cut graph to an SVG file.

    Returns (path_to_svg, svg_content) tuple. Both are None if pydot is unavailable.
    r   NzMInstall pydot to visualize the min-cut graph for debugging: pip install pydotT)exc_info)NNr1  r  redutf-8min_cut_failed.svgrK  )rt  pydotru  r<   r  nx_pydotto_pydot	to_stringgraph_from_dot_data	get_edges
get_sourceget_destination	set_labelr   r=  	set_color
create_svgdecoder{  r|  r}  )
r5  rd  r  
dot_format	dot_graphedger  r[  r  r  s
             rK   r  r  V
  s       [ 	 	
 	
 	
 zz %%h//99;;J))*55a8I##%% " "$//++,T-A-A-C-CDZPs6{{###U5\\!!NN5!!! &&((//88K   0&99H	h		 	               [  s    &55FFFc                 .   g t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j	        t           j
        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j         t           j!        t           j"        t           j#        t           j$        t           j%        t           j&        t           j'        t           j(        t           j)        t           j*        t           j+        t           j,        t           j-        t           j.        t           j/        t           j0        t           j1        t           j2        t           j3        t           j4        t           j5        t           j6        t           j7        t           j8        t           j9        t           j:        t           j;        t           j<        t           j=        t           j>        t           j?        t           j@        t           jA        t           jB        t           jC        t           jD        t           jE        t           jF        t          jH        t           jI        t           jJ        t           jK        t           jL        } t           jI        t           jJ        t           jM        g}|t           jN        t           jO        t           jP        t          jR        t           jS        t           jT        t           jU        t           jV        t           jW        g	z  }|}| g t          j        t          jX        t           jY        t           jL        t           jZ        t          j[        t          j@        t           j[        t           j\        t          jR        t           jV        t           j]        t           jN        t           jS        t           jO        t           j^        t           j_        t           j`        t           ja        t           jb        t           jc        t           jd        t           je        t           jf        t           jg        t           jh        t           ji        t           jT        t           jj        t           jk        t           jl        t           jm        t           jn        t          jo        t          jp        z  } | t           jq        t           jr        gz  } | |z  } | t                      z  } | t           jt        gz  } | d t          D             z  } t          |           }t          t          dt          f                  t           jy        t           jz        t           j{        g          }t           j|        t           j}        t           j~        t           j        t           j        t           j        t           j        t           j        t           j        t           j        t           j        g}||z  }t          |t          |          |t          |          |          S )Nc                ,    g | ]}t          |          S r\   )r   )rl   ms     rK   r   z'get_default_op_list.<locals>.<listcomp>  s!     N N N1!3A!6!6 N N NrM   .)r   r  r  r  atan2rM  r<  r>  pow	remainderfmod__and____or____xor__
__lshift__
__rshift__eqnegegtleltrF  bitwise_notceilfloorfracnegreluroundsilutruncr<   log10log1plog2lgammaexpexpm1erferfccosacoscoshsinasinsinhtanatantanhatanhsqrtrsqrtrL  sigmoidsoftplus	thresholdthreshold_backwardclampwherelerpaddcmulgelugelu_backwardre  mean_grad_sum_to_sizesum_to_sizerG  totype_asr   r   squeeze	unsqueezersub_to_copyaliasr   slicetrH  broadcast_in_dimexpand
as_stridedpermuteselectrp  rI  clone	full_likevarstd_unsafe_viewr   broadcast_tensorsscalar_tensorones	new_zerosr  arangetriuvar_meanisinfr   fullzerosempty
empty_likeargmaxmaximumiota'_low_memory_max_pool_offsets_to_indicesr  gatherrz  
zeros_liker   r$   r   r	   native_dropout	rand_like
randn_likemmconvolutionconvolution_backwardbmmaddmm#_scaled_dot_product_flash_attention'_scaled_dot_product_efficient_attention_flash_attention_forward_efficient_attention_forwardupsample_bilinear2d
_scaled_mmr>   )default_recomputable_opsrecomputable_view_opsrB   rC   rA   r@   r?   s          rK   rs  rs  }
  ss   L:L:L: 	L: 	
	L:
 	L: 	L: 	L: 	L: 	L: 		L: 	L: 	L: 	L: 	L: 	L:  	!L:" 	#L:$ 	%L:& 	'L:( 	)L:* 	+L:, 	-L:. 	/L:0 		1L:2 	
3L:4 		5L:6 	7L:8 		9L:: 	
;L:< 		=L:> 	
?L:@ 	AL:B 	
CL:D 	
EL:F 		GL:H 	IL:J 	KL:L 	
ML:N 	OL:P 		QL:R 	SL:T 		UL:V 		WL:X 	YL:Z 		[L:\ 		]L:^ 	_L:` 		aL:b 		cL:d 	
eL:f 		gL:h 	
iL:j 	kL:l 	mL:n 	oL:p 	qL:r 	sL:t 	
uL:v 	
wL:x 		yL:z 	{L:| 		}L:~ 	L:@ 	AL:B 		CL:D 	EL:F 	GL:H 		IL:J 	KL:L 	ML:N 	OL:P 	QL:R 	SL:T 		UL:V 	WL:Z "\4>4:F	


 
 %H $!	$!"$! 	
$! 		$!
 	$! 		$! 		$! 	$! 	$! 	$! 	$! 	$! 		$! 	$! 	
$!  	!$!" 	#$!$ 	%$!& 		'$!( 	)$!* 	+$!, 	-$!. 		/$!0 	1$!2 	
3$!4 	5$!6 		7$!8 	9$!: 	
;$!< 	
=$!> 	?$!@ 	A$!B 	C$!D 	
E$!F 	5G$! $L T[ 99(/!   N N N N NN!":;;HS#X./		dndo> J 	!
04%)  #Z/K())8  rM   c                2    i }| j         D ]}|||j        <   |S rH   )r   r   )r   rp  rD   s      rK   r  r  %  s-    ')L ' '"&TYrM   memorylist[float]runtimes
max_memoryall_recomputable_banned_nodes"tuple[float, list[int], list[int]]c                0   t           j        }|dk    rt          |||          S |dk    rt          |||          S |dk    rt	          |||          S |dk    rt          |||          S |dk    rkt                              d           t          j	        | |||          }t	          ||t          |                              t          |	                    S t          |t                    r ||| |||          \  }}	d
||	fS t          d|           )Ngreedyilpdpr)   dynamic_memory_budget_dpzdynamic_memory_budget_dp is an experimental solver. It does not guarantee performance improvements. Additionally, it is not guaranteed to be stable.)r   rN   recorded_knapsack_input_memories recorded_knapsack_input_runtimes)graph_info_provider)knapsack_algomax_mem_budgetr9  z,Not aware of memory budget knapsack solver: )r   activation_memory_budget_solverr*   r+   r(   r)   r<   warningr'   inialize_from_graphr,   get_knee_point_memory_budgetr   r   r   )
r   rJ  rL  rM  rS  rN  SOLVERrW  saved_node_idxrecomp_node_idxs
             rK   #_optimize_runtime_with_given_memoryra  ,  sr    3Fvx<<<	5FHj999	468Z888	3	3	3-fh
KKK	-	-	-?	
 	
 	

 0C#*G-3-5	
 
 
 $7  **)) +  	
 	
 		
 
F0	1	1 T*0&KY8U+
 +
' ^_55R&RRSSSrM   no_dispatchr   r`  c                    t          | j                  }d	fdfd|D             }fd|                                 D             }|                     ||          S )
Ndtorch.SymInt | intrF   r}   c                &    t          |           S )Nr_  )r    )re  r`  s    rK   realize_symbolz8_remove_symbols_without_guarding.<locals>.realize_symbolc  s    X....rM   c                &    g | ]} |          S r\   r\   rl   r  rh  s     rK   r   z4_remove_symbols_without_guarding.<locals>.<listcomp>f  s#    ...1^^A...rM   c                &    g | ]} |          S r\   r\   rj  s     rK   r   z4_remove_symbols_without_guarding.<locals>.<listcomp>g  s#    444AnnQ444rM   )stride)re  rf  rF   r}   )r  shaperl  new_empty_strided)r   r`  rm  rl  rh  s    `  @rK    _remove_symbols_without_guardingro  `  s    MME/ / / / / / /......E4444444FuV444rM   c                   	 t           j        }dd}|dk    rdS |dk    rnt                      5  dd	lm} t          j        | j         j        f          \  	|	                    	 fd
          }|cd d d            S # 1 swxY w Y   d S |dk    rddl
m} t          j        | j         j        f          \  	 |d          5 }  j        i 	 d d d            n# 1 swxY w Y   |                                }t          |d          S t          |t                     r |           S t#          d|           )Nr   r	   rF   c                v   t          | t          j                  rAt          | j        d         t          j                  rt          | j        d         d          S t          | t          j                  rAt          | j        d         t          j                  rt          | j        d         d          S t          | t          j                  r't          | j        d         t          j	                  rdS t          | t          j                  r't          | j        d         t          j
                  rdS | S )Nr   r^  r_        ?T)r   r   r   r   r   rN  ro  r   r    r   r   ra  s    rK   materialize_argz)estimate_runtime.<locals>.materialize_argn  s    a!! 		j&M&M 		3AF5MDQQQQ27## 	
16%=%,(O(O 	QVE]T::::27## 	
16%=%.(Q(Q 	327## 	
16%=%-(P(P 	4HrM   testingr&   profiler   )benchmarkerc                      j          i S rH   )r   )r   r   rD   s   rK   rp   z"estimate_runtime.<locals>.<lambda>  s    ;4;3O3O3O rM   flops)FlopCounterModeF)displayz Not aware of runtime estimator: )r   r	   rF   r	   )r   *activation_memory_budget_runtime_estimatorrc  $torch._inductor.runtime.benchmarkingrv  r   tree_mapr   r   benchmark_gputorch.utils.flop_counterry  r   get_total_flopsr<  r   r   r   )
rD   RUNTIME_MODErs  rv  msry  modecounted_flopsr   r   s
   `       @@rK   estimate_runtimer  k  s   DL
 
 
 
 y  q		"	"]] 	 	HHHHHH!??TY<TUULD&**+O+O+O+O+O+OPPB	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 
	 	 <<<<<<DK8PQQf_U+++ 	)tDK((((	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) 	) ,,..=!$$$	L"8	9	9 N|D!!! LlLLMMMs$   ABB
BC&&C*-C*memory_budgetc                
    !"#$%&'( |dk    s|dk     rt          d|           t          t          j        t          j        t          j        t          j        t          j                  }t          j        rt          |dddd          }|dk    rj
        S t           |          \  }}|dk    r|S dBd j
                  % |          ##%k    r|S dC#%fd dD#%fdt          |ddd          }t           |          \  }} |          |k     r|S t          |d          t                     \  }}	 |          |k     r|S ddlm t          fdj
        D                       "dE"fd}
 |
|	          }d |D             &&fd|D             }t          |t           d          t#                    dk    r
j
        &z   S  fdD             $d D             (ddlm' dF$'(fd$!t          j        r=dG! (fd'} |d(           |d)          g}|d         dd          |d         dd          k    r|d         |d         fg}|r|                                \  }}|d         |d         z
  d*k     r+|                    |           |                    |           Y ||d         |d         z   d+z            }|dd          |dd          k    r|                    ||f           |dd          |dd          k    r|                    ||f           ||                                 dd lm} d, |D             }d- |D             }|                    d./           |                    ||d01           t9          |          D ])\  }}|                    |d2|||         fd3d4d56           *|                    d7           |                    d8           |                     d9           |!                    d           |"                                }|#                                 tI          j%                    }t          j&        "t          j&        }tI          j'        |d:           d;}tP          j)        *                                r?tP          j)        +                                r!d<tP          j)        ,                                 }tH          j-        .                    |d=| d>t_                       d?          }|0                    |           tb          2                    d@|            !| A          d         S )HNr&   r   zJThe valid ranges for memory budget are 0 <= m <= 1. The provided value is )r   r   r   r   r   F)r   r   r   r   r*  r_   rF   r=  c                L    t          t          t          |                     dz  S N    eA)re  maprg  )r*  s    rK   estimate_activations_sizez:choose_saved_values_set.<locals>.estimate_activations_size  s    3x..//#55rM   szc                    | dz  z
  z  S r  r\   )r  max_act_sizemin_act_sizes    rK   get_normalized_sizez4choose_saved_values_set.<locals>.get_normalized_size  s    S\L899rM   activationsc                ,     |           z
  z
  z  S rH   r\   )r  r  r  r  s    rK   get_mem_ratioz.choose_saved_values_set.<locals>.get_mem_ratio  s(    ))+66E<'
 	
rM   )r   r   r   )r   )get_node_storagec              3  .   K   | ]} |          V  d S rH   r\   )rl   rD   r  s     rK   rn   z*choose_saved_values_set.<locals>.<genexpr>  s/      TT4 0 0 6 6TTTTTTrM   r4  ra   c                "    fd| D             S )Nc                |    g | ]8}|j         t          d           k     r |          vst          |          6|9S )r  )r  r}   r   )rl   r6  r  input_storagess     rK   r   zRchoose_saved_values_set.<locals>.get_recomputable_banned_nodes.<locals>.<listcomp>  s`     
 
 
 S))$$Q''~==033 >  >==rM   r\   )r4  r  r  s    rK   get_recomputable_banned_nodesz>choose_saved_values_set.<locals>.get_recomputable_banned_nodes  s3    
 
 
 
 
!
 
 
 	
rM   c                d    g | ]-}|j                             d d          t          j        k    +|.S )r   F)r   r   r%   rH  r  s     rK   r   z+choose_saved_values_set.<locals>.<listcomp>  sA       6::k5))-=-GGG 	
GGGrM   c                    g | ]}|v|	S r\   r\   )rl   r6  must_save_nodess     rK   r   z+choose_saved_values_set.<locals>.<listcomp>  s*     ! ! !0H0H0H0H0HrM   Trj  c                @    g | ]} t          |                    S r\   rg  )rl   r6  r  s     rK   r   z+choose_saved_values_set.<locals>.<listcomp>  s8       -.HQKK((  rM   c                ,    g | ]}t          |          S r\   )r  r   s     rK   r   z+choose_saved_values_set.<locals>.<listcomp>  s.       #'  rM   rb  r  rS  r^   r   r   tuple[list[fx.Node], float]c                                5  t          |t          | d          |          \  }}}d d d            n# 1 swxY w Y   t                      }|D ].}	 |                    |                    # t          $ r Y +w xY w|                              st          d          t          ||
|          \  }}	t          r"t          ||||d D             |	  	         ||fS )Nr   z:dont_ban must be a subset of all_recomputable_banned_nodesc                ,    g | ]}t          |          S r\   r  r  s     rK   r   zNchoose_saved_values_set.<locals>.get_saved_values_knapsack.<locals>.<listcomp>>  s+     ' ' '$%HQKK' ' 'rM   )	r   rN  saved_node_idxsrecomputable_node_idxsexpected_runtimememories_banned_nodes normalized_memories_banned_nodesruntimes_banned_nodesmin_cut_saved_values)
ra  r<  r$   r  BaseExceptionissubsetr   r  r;   r   )r  rS  r   r  r  r  r  r5  r*  rC  aggressive_optionsrN  r  rc  r  s             rK   get_saved_values_knapsackz:choose_saved_values_set.<locals>.get_saved_values_knapsack  s    []] 	 	
 4%%M1%%- 	 &		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 )3) 	 	C:3?@@@@       !>?? 	 L   (	
 
a ! 	4'.K /'=!1' ')F' ' ' 2G&;%1    ---s!   '?AAA99
BBr  tuple[float, float, float]c                b     |           \  }}| t                    |z
   |          fS )N)rS  r   )re  )r  r*  r  r  r  r   rS  r  s      rK   estimate_for_budgetz4choose_saved_values_set.<locals>.estimate_for_budgetI  sU    -F-FYK. . .*L* )**-==l++ rM   r9  rr  gMbP?r(  c                    g | ]
}|d          S )r(  r\   rl   items     rK   r   z+choose_saved_values_set.<locals>.<listcomp>f      000DG000rM   c                    g | ]
}|d          S r&   r\   r  s     rK   r   z+choose_saved_values_set.<locals>.<listcomp>g  r  rM   )
      )figsizeo)markerz.4fzoffset points)r   r  center)
textcoordsxytexthazMemory Budgetz Runtime of Recomputed Componentsz:Pareto Frontier of Memory Budget vs. Recomputation Runtime)exist_okr  _rank_memory_budget_paretorC  r  z%Generated Pareto frontier curve at %s)r  rS  r   )r*  r_   rF   r=  )r  r=  rF   r=  )r  r_   rF   r=  )r4  ra   rF   r_   )r  r=  rS  r^   r   r   rF   r  )r  r=  rF   r  )3r   r   r   ban_recompute_used_far_apart!ban_recompute_long_fusible_chains#ban_recompute_materialized_backwardban_recompute_not_in_allowlistban_recompute_reductionsaggressive_recomputationr   r`   r  torch._inductor.fx_utilsr  r$   rs   rg  r   torch.utils._mode_utilsrc  visualize_memory_budget_paretor  r   sortmatplotlib.pyplotpyplotfigureplotr  annotatexlabelylabeltitlegridgcfshowr  getcwdmemory_budget_pareto_dirmakedirsr   r  r  is_initializedget_rankr  r_  r1   savefigr<   r[  ))r   rS  r  r  runtime_optimized_saved_valuesrC  more_aggressive_optionsmore_aggressive_saved_values%aggressive_recomputation_saved_valuesr4  r  recomputable_banned_nodesr  optionsbisectslhsrhsmidpltx_valuesy_valuesr6  txtfigfig_dirrank_suffixfig_namer  rN  r  r  r  r  r  r  r  r  r  r  rc  r  s)   ``                         @@@@@@@@@@@@@@rK   choose_saved_values_setr    s   
 qMA--hYfhh
 
 	
 $$A#)#K%+%O & E8  O & 
!"'',).$)
 
 
 (5) )%"A --6 6 6 6 -,Y-=>>L,,-KLLL|##--: : : : : : :
 
 
 
 
 
 
 

 &##(%*	   '4Y 7' '# ! }122]BB++  %   ;HY 2; ;7)< }:;;mKK44999999TTTT9CSTTTTTN
 
 
 
 
 
 
  !> =l K K *  O
! ! ! !,! ! ! %+!x% % %! ())Q../11   2O   +H   4333331. 1. 1. 1. 1. 1. 1. 1. 1. 1.f , AG	 	 	 	 	 	 	 	 	 	 '&s++-@-@-E-EF1:abb>WQZ^++
GAJ/0G 
/";;==Sq6CF?T))NN3'''NN3'''))3q6CF?a*?@@qrr7c!""g%%NNC:...qrr7c!""g%%NNC:...  
/ 	''''''0000000000 	

7
###8C000  )) 	 	FAsLLhqk"*      	

?###

5666		NOOOggii


)++*65GK$////))++ 	B0A0P0P0R0R 	BA5#4#=#=#?#?AAK7<<TKTT:L:N:NTTT
 
 	H;XFFF %$#yk  	 	rM   list[torch.fx.Node]c                X   ddl m dd}dfd}t          j                                        rwt          j                                        rXt          j                                        d	k    r5 ||           r) ||           rt                      5               5  d
 |D             g}d t          t          j                                                  D             }t          j        	                    ||d                    t          |           g }i }t          |          D ]t\  }}	fd|	D             }
d}|
D ]B}t          |          }||z  }|t          j                                        k    r
|||j        <   C||d<   |                    |           ut          j        |t          j        j                                                  }t          j                            |t          j        j        j        j                   t-          t          j        |                                                    }d| d| t3          dd fd           fd||         D             }d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   |S )Nr   )unset_fake_temporarilyr   r:  rF   r:   c                    | j         D ]7}t          |j        t          j        j                  r|j        j        dv r dS 8dS )N>   c10d_functionalr   TF)r   r   r   r   r   r  r   )r   rD   s     rK   has_collectivesz3_sync_decision_cross_ranks.<locals>.has_collectives  sP    % 	 	DUZ2  +'+RRRtturM   c                0   d                     d | j        D                       }t          j        |                    d                                                    }d t          t          j        	                                          D             t                      5               5  t          j                            |           d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t          fdD                       S )N/c              3  $   K   | ]}|j         V  d S rH   r  rl   r   s     rK   rn   zE_sync_decision_cross_ranks.<locals>.has_same_nodes.<locals>.<genexpr>  s$      >>qAF>>>>>>rM   r  c                    g | ]}d S rH   r\   rl   rC  s     rK   r   zF_sync_decision_cross_ranks.<locals>.has_same_nodes.<locals>.<listcomp>  s    NNNqdNNNrM   c              3  0   K   | ]}d          |k    V  dS r  r\   )rl   r   
all_inputss     rK   rn   zE_sync_decision_cross_ranks.<locals>.has_same_nodes.<locals>.<genexpr>  s,      ::!:a=A%::::::rM   )r_  r   hashlibsha256encode	hexdigestr4  r   r  get_world_sizerc  all_gather_objectr  )r   node_strr`   r  r  s      @rK   has_same_nodesz2_sync_decision_cross_ranks.<locals>.has_same_nodes  s   
 88>>K,=>>>>> 8 899CCEENNE%*;*J*J*L*L$M$MNNN
]] 	D 	D2244 	D 	D//
FCCC	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D 	D ::::z::::::s6   #C2.!CC2C	C2"C	#C22C69C6r&   c                    g | ]	}|j         
S r\   r  r  s     rK   r   z._sync_decision_cross_ranks.<locals>.<listcomp>  s    5551555rM   c                    g | ]}g S r\   r\   r  s     rK   r   z._sync_decision_cross_ranks.<locals>.<listcomp>  s%     : : :: : :rM   c                     g | ]
}|         S r\   r\   )rl   op_namerp  s     rK   r   z._sync_decision_cross_ranks.<locals>.<listcomp>  s    TTT|G4TTTrM   z
total size)r  r!  zpicked_rank_idx=z, saved_nodes of current rank=r  c                     dddS )N)aot_joint_graph_sync_decision_cross_ranksr  r  r\   r\   rM   rK   rp   z,_sync_decision_cross_ranks.<locals>.<lambda>  s    G (% % rM   c                      S rH   r\   )sync_decision_cross_ranks_strs   rK   rp   z,_sync_decision_cross_ranks.<locals>.<lambda>  s    #@ rM   r  c                     g | ]
}|         S r\   r\   )rl   rm   rp  s     rK   r   z._sync_decision_cross_ranks.<locals>.<listcomp>  s*       $%Q  rM   )r   r:  rF   r:   )torch._subclasses.fake_tensorr  r   r  r  r  r  rc  r4  r  r  r  rg  r  r   r   rc  distributed_c10d_get_object_coll_device
all_reduceReduceOpMAXr}   argminr  r   )r   r*  r  r  objectssaved_ops_names_all_rankssaved_sizessaved_ops_with_sizesr5  saved_ops_namessaved_nodes
saved_sizerD   size_of_nodesaved_sizes_tensorpicked_rank_idxrp  r  r  s                   @@@rK   rJ  rJ    s    EDDDDD   ; ; ; ; ; ; 	&&((1,,..1 ,,..22OK(( 3N;'' 3 ]] *	 *	2244 *	 *	555556G: :!%"3"B"B"D"DEE: : :% //0I7ST:VVV+K88L%'K35 (12K(L(L 	/ 	/$_TTTTOTTT
' G GD#+D>>L,.Je/88:::::F,TY75?$\2"":....!&(9QQSS" " " (("u'8'I'R'V )    "%,/A"B"B"G"G"I"IJJO -E  -E  -E  oC  -E  -E)  A@@@      )B?)S  LQ*	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	 *	X s7   J&GJ<JJ	JJ	JJ#&J#moduler  c                r   d}|rdnd}t          t          | j                            d                              }| j                            dt          j        j        j                  D ]}t          | |j	        d         j
                  }t          |t          j                  rg }|j                            d          D ]}||j        v r| j                            |          5  | j                            | d|           }	|d	z  }|j        d
         |	j        d
<   |	}|                    |	           ddd           n# 1 swxY w Y   |r| j                            |          5  | j                            dt          j        j        j        g |j	        |R i           }
|                    |
d           ddd           n# 1 swxY w Y   |j                            d          }|r"|\  }}g |d |D             R }||f|
j        d<   | j                            |           | S )u  
    Graph-safe RNG lets torch.compile use CUDA Graphs for graphs with RNG ops.
    For graphs without HOPs, the partitioner adds placeholder nodes
    fwd_rng_state_* and bw_rng_state_* to the forward and backward graphs. At
    runtime, the AOTDispatcher retrieves these RNG states and passes them to the
    compiled graphs.

    This works well for no-HOP graphs. With HOPs, the partitioner runs
    recursively: it first partitions the HOP (producing forward/backward HOP
    subgraphs) and then stitches them back into the outer joint graph. For HOPs
    that contain RNG ops, the outer joint graph now includes HOP subgraph
    modules with extra RNG placeholders. We must thread these placeholders
    through the outer module partitioned forward and backward graphs—this
    function does exactly that. It collects the RNG placeholder nodes from the
    HOPs and creates corresponding placeholders in the outer forward and
    backward graphs.

    There is a catch: for a short period, the joint graph is in a “bad” state.
    The HOP subgraphs expect additional inputs (because of the new
    placeholders), but the outer graph call sites don't yet provide them. We
    can't fix this in the joint graph because the joint graph's input signature
    is fixed (primals, tangents). As a compromise, we keep the joint graph in
    somewhat of a bad state for some time and, once the outer forward and
    backward graphs are partitioned, insert the corresponding RNG placeholders
    and wire up the calls.
    r   r  r  r   r!  r   )r   r   rC  r&   r   NT)propagate_metaeager_input_valsc                (    g | ]}|j         d          S )r   )r   )rl   inps     rK   r   z2thread_graphsafe_rng_from_hops.<locals>.<listcomp>-  s    DDDc#(5/DDDrM   )r#  r  r   r"  r   r   r  invoke_subgraphr   r   r   r   r   r  r   rE  r   r   r   r  r  r   r  )r"  r  r  
rng_string
last_inputhop_noder   new_rng_inputsplaceholder_noder  new_hop_node_with_fixed_args
eager_vals
eager_argseager_kwargsnew_eager_argss                  rK   rN  rN    s   < I$/D_Jhv|66-6HHIIJJJL++59#9#I ,   ,2 ,2 68=#3#:;;h// (	2,.N$,N$=$=$=$O$O 9 9 !1!666  55jAA 9 9$*L$<$<)77I77% %	 "Q	0@0Ee0L	u-%.
&--i8889 9 9 9 9 9 9 9 9 9 9 9 9 9 9  2\11(;; 	 	39<3K3K'	.>9(-9.99	4 40 224T 3   	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 &]../ABB
 	/9,J&#&DD^DDD& &N
 '$M056HI ''111Ms&   .AEEE3AGG	G		list[int]c           	        t          | j                  t                      | j        j        D ]n}|j        dk    rd|j        v r                    |           n$t          |          r                    |           |v r                    |j	                   ot          t          t          | j        j                            }t          t          t          | j        j                            }||z   }t          | |          \  }}}	}
                                }                    d |D                        t!          | j        |||	d          }t          fd|j        D                       t          fd| j        j        D                       }t          fdt#          |          D                       }d	}i }| j        j        D ]}|v r
|||<   |d
z  }t%          |||||          S )Nr   tangentsr  c              3  4   K   | ]}||j         dk    |V  d S )Nr   r!  )rl   r  s     rK   rn   z!classify_nodes.<locals>.<genexpr>U  s;        !-ADH4D4D4D4D4D4D rM   r   c              3  H   K   | ]}|j         d k    |j                 V  dS r  r  ro  s     rK   rn   z!classify_nodes.<locals>.<genexpr>[  sC       8 87h 	TY8 8rM   c              3  ,   K   | ]}|v|v
|V  d S rH   r\   )rl   rD   rc   ru   s     rK   rn   z!classify_nodes.<locals>.<genexpr>`  sF       6 6(((T9J-J-J 	-J-J-J-J6 6rM   c              3  *   K   | ]\  }}|v 	|V  d S rH   r\   )rl   r6  pr  s      rK   rn   z!classify_nodes.<locals>.<genexpr>e  s;       - -a!7T2T2T2T2T2T2T- -rM   r   r&   )r  r   r$   r   r   r   r  r   r  r  r  r  r4   r3   r)  r  r   r  r^   )r  r  r  rD   r  r  r`   r%  r&  r'  r(  rd   forward_only_graphre   rh   fw_cntrg   rp  rc   ru   s    `               @@@rK   r@  r@  8  s   
 $L$677L-7\\"( 1 17m##
dk(A(A!!$''''!$'' 	(!!$'''$$$$$TZ000
L,>,DEEFFM!&)<l>P>V"W"WXX33F OOO CK/1B )--//        <FK1BI  .8 8 8 8 8&,8 8 8 . .
 ,6 6 6 6 6 6 &,6 6 6 , ,O
 #- - - - -..- - - # # FH"(  $$$#HTNaKF#  rM   r  )r  compilerc               	   | j                                          |                                  | j         }t          j        rt          |          }|| _         | j         }t          |           }t          |           }	|rt          | d          } t          j	        st          |            t          |            t          |            |g }t          | ||          }
t          |
j                  dk    rt!          | ||||
j                  S t%          | j         j                  D ]}|j        dk    rt+          d          |_        "|
                    |          sd|_        ?t+          d          |_        |j        D ]$}t3          |j        |j        dz             |_        %t          j        }|j        D ]?}t7          |j                            d	d          t<                    r|j        d	         } n@t?          ||
|
          }t          j         rtA          ||          }tC          tE          tF          |                    }tC          tE          d |                    }tI          | ||||
j                  \  }}|r$|	r"tK          | ||t          |                    \  }}tM          |          }t          j'        rddl(m'}  |||||
j                   tS          |          }tS          |          }tU          |d          }tU          |d          }tV          rtY          d |D                       }t[          d |D                       dz  }t\          /                    d|           t\          /                    d|           ta          d |j         j        D                       }ta          d |j         j        D                       }||z  }tc          t*                    }|j         j        D ]G}|j2        |v r<tg          |j4        d          r'|tk          |j4        j6                  xx         dz  cc<   Ht\          /                    dt          |          t          |          t          |                     tY          |7                                tq          j9        d          d          }t\          /                    d|           ||fS )ax  
    Partitions the joint graph such that the backward recomputes the forward.
    Recomputing helps in trading off memory bandwidth with computation.

    To create the fwd and bwd graph, we copy the joint graph, manually set the
    outputs to just original forward or backward outputs. And then we run the
    resulting graphs through dead code elimination.

    .. warning::
        This API is experimental and likely to change.

    Args:
        joint_module(fx.GraphModule): The joint forward and backward graph. This
            is the result of AOT Autograd tracing.
        _joint_inputs: The inputs to the joint graph. This is unused.
        compiler: This option determines the default set of recomputable ops.
            Currently, there are two options: ``nvfuser`` and ``inductor``.
        recomputable_ops: This is an optional set of recomputable ops. If this
            is not None, then this set of ops will be used instead of the
            default set of ops.
        num_fwd_outputs: The number of outputs from the forward graph.

    Returns:
        Returns the generated forward and backward Fx graph modules.
    Fr  Nr   )r  r  rh   r   r  r&   r  )r  c                "    t          |            S rH   r2  )rm   s    rK   rp   z5min_cut_rematerialization_partition.<locals>.<lambda>  s    [^^); rM   r3  r5  r7  Tc                J    g | ] }t          |          t          |          f!S r\   )rg  r   r  s     rK   r   z7min_cut_rematerialization_partition.<locals>.<listcomp>  s)    KKKSVV4KKKrM   c              3  4   K   | ]}t          |          V  d S rH   r  r  s     rK   rn   z6min_cut_rematerialization_partition.<locals>.<genexpr>  s(      'J'J'J'J'J'J'J'JrM   z'Theoretical Activations Stored: %.2f GBz,Theoretical Per Activation Storage Sizes: %sc              3  :   K   | ]}|j         d k    |j        V  dS r   Nr  r   s     rK   rn   z6min_cut_rematerialization_partition.<locals>.<genexpr>  9       %
 %
47o;U;UDI;U;U;U;U%
 %
rM   c              3  :   K   | ]}|j         d k    |j        V  dS rC  r  r   s     rK   rn   z6min_cut_rematerialization_partition.<locals>.<genexpr>  rD  rM   r  z# remat/fw/bw: %d/%d/%drj  zCount of Ops Rematerialized: %s):r   r   r  r   cser7   r   r   r;  r<  r=  r>  r?  r@  r   rc   rW  rh   r  r   r   r}   r  rx   r  r>  activation_memory_budgetr   r   r   r=  r  rJ  r  r  r   r  rK  rL  r6  rM  r9   rN  r;   rs   re  r<   r  r$   r   r   r   r   r   r  rf  r   rm  )r  r  r=  r  r  r   	cse_graphr   rQ  rR  rS  rD   r  r  r*  r  rU  rV  r6  sorted_sizestotal_activations_size_gbfw_module_nodesbw_module_nodesremat_nodescountsrematerialized_opss                             rK   r:  r:  y  sx   D **,,,D z ' &&	&$K!5l!C!C%=l%K%K"! X-lQVWWW: -|,,,\***|,,,$,(*%3_ I 9&''1,, +*G(1(M
 
 
 	
 +122 R R7h #CD))$// 	R !D #CD
 R R$'(94;Lq;P$Q$Q!!R 3M!  dimmOT::EBB 	 Io6ME	 +#  L ( M1+|LL6+|<<==O;;\JJKKL 4''$-$I  Iy " ) 	#8iC4H4H$ $ Iy 4I>>I * 

	
 	
 	
 	
 	
 	
 	%$1		
 	
 	
 y))Iy))I.yeLLLI.ydKKKI HKKlKKKLL %('J'J\'J'J'J$J$JS$P!:<UVVV 	?NNN$ %
 %
"+/"7%
 %
 %
 
 
 % %
 %
"+/"7%
 %
 %
 
 
 &7!,S!1!1O) 	> 	>DyK''GDKAR,S,S's4;677888A=888%    		
 	
 	
 $LLNN 3A 6 6
 
 
 	24FGGGirM   fx_graphTtracedfnamefigname
clear_metaprogstr | list[str] | Noneparse_stack_tracedot_graph_shapec                   |rDt          j        | j                  }t          j        | |          } | j        j        D ]	}i |_        
t          j        	                    |          \  }	}
|
sdt          j        z   }
t                              d|	|
           t          j        | |||          }|                                }t#          |d|
                    d          z             }|	 |
 }| ||           d S  |||           d S )Nro  zWriting FX graph to file: %s%s)rW  rX  write_)rU  )r  deepcopyr   r   r  r   r   r  r  splitextr   torch_compile_graph_formatr<   r  r#   FxGraphDrawerget_main_dot_graphr   lstrip)rQ  rR  rS  rT  rU  rW  rX  r   rD   r  extgr   write_methods                 rK   
draw_graphrd  %  s'     M&,//		22L& 	 	DDII  ''ID# 6F55HH-tS999"+'		 	 	A 	
A1hC899LNSNNE|UU&&&&&&rM   rV   )r   r   rF   r:   )rD   rE   rF   r}   )r   rE   rF   r   )rD   rE   rF   r   )rD   rE   r   r   rF   r   )NF)r   r   r`   r_   r   r_   r   r   r   r   r   r:   rF   r   )r  r   r  r}   rF   r  )r*  r_   r   r   rF   r+  )r0  r1  rF   r}   )r8  r9  r   )r   r:  rD   r;  r<  r=  r>  r=  r?  r}   rF   r;  )r   r:  rD   r;  rV  r;  rX  rY  rK  r=  rZ  r=  r?  r}   rF   r;  )rc  rd  rF   r=  )rF   rj  )rD   r;  rF   r:   )rF   rY  )rq  rY  rF   r  )r   r:  rF   r+  )r  r   r  r   r  r  rF   r+  rH   )
r*  r_   r  r   r  r   rh   r  rF   r+  )r  r   r*  r_   r  r_   r  r}   rh   r  rF   r  )r  r   r  r	   r  r}   r  r  rh   r  rF   r  )rf  r}   rq  rY  rF   r}   )r   r   rF   r+  )rF   rp  )r   r{  r|  rf   rF   r}  )r  r   rF   r   )rU  r  rV  r  r  r;  r  r;  r  r  r  r}   r  r;  r  r;  rF   r  )
r  r   rU  r   rV  r   r  r}   rF   r  )r  r   rF   r+  )r  r   r   r:   rF   r   )
r   r   rS  r^   r  r   r  r  rF   r  )r5  r  rF   r  )r  r   r  r   rF   r   )r5  r  rF   r  )rF   r>   )r   r   rF   r  )r   r   rJ  rK  rL  rK  rM  r=  rS  r^   rN  r_   rF   rO  )r   rd  r`  r}   rF   rd  )rD   rE   rF   r=  r  )r   r   rS  r^   r  r=  rF   r_   )r   r:  r*  r  rF   r  )r"  r   r  r:   rF   r   )r  r   r  r3  r  r}   rF   r^   )r  )r  r   r  r	   r=  r   r  r}   r  r  rF   r  )rP  TNFN)rQ  r  rR  r   rS  r   rT  r:   rU  rV  rW  r:   rX  r   rF   r+  )
__future__r   r  r   r   r=  r  loggingr  r   r  os.pathr  r8  rl  r   r   collections.abcr   dataclassesr   r   typingr	   r
   r   torch._inductor.inductor_primstorch.distributedtorch.fxr   torch.utils._pytreeutils_pytreer   torch._dynamo.utilsr   r   ;torch._functorch._activation_checkpointing.ac_logging_utilsr   $torch._functorch._aot_autograd.utilsr   torch._inductorr   r  !torch._inductor.custom_graph_passr   r   "torch._library.fake_class_registryr   torch._library.utilsr   torch._loggingr   r   torch._logging._internalr   r  r   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r    r!   r"   torch.fx.passesr#   torch.utils._ordered_setr$   torch.utils.checkpointr%   r  -_activation_checkpointing.graph_info_providerr'   "_activation_checkpointing.knapsackr(   r)   r*   r+   ,_activation_checkpointing.knapsack_evaluatorr,   _aot_autograd.descriptorsr-   r.   r/   _aot_autograd.functional_utilsr0   _aot_autograd.logging_utilsr1   _aot_autograd.utilsr2   r3   r4   r5   r6   compile_utilsr7   r8   r9   rt  rd  sympydebug_partitionerr;   r[   	getLoggerrW   r<   r   r   rH  r>   r^   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r
  r  r  r  r   r   r)  r/  r7  rW  rb  ri  ru  r|  r~  r  r  r  r  r  r  rW  r}   r*  rZ  rg  ro  cacherz  r  rL  r  rK  r=  r>  r?  r  r;  r  r  r{  r  rs  r  ra  r  rc  ro  r  r  rJ  rN  r@  r:  rd  r\   rM   rK   <module>r     s   " " " " " " "               				  				  * * * * * * * * $ $ $ $ $ $ * * * * * * * * % % % % % % % %  % % % %           $ $ $ $ $ $ $ $ $ < < < < < < < <      A @ @ @ @ @ 5 5 5 5 5 5        @ ? ? ? ? ? + + + + + + 7 7 7 7 7 7 7 7 . . . . . . A A A A A A ? ? ? ? ? ? H H H H H H H H L L L L L L L L                ) ( ( ( ( ( / / / / / / 3 3 3 3 3 3       L L L L L L            L K K K K K         
 A @ @ @ @ @ ; ; ; ; ; ;              I H H H H H H H H H  LLL %6  6 6 6 6'g'11 1 1 1 1y~	 > > > > > > > >2                B                           I I I I o   2- - - -   H  $)l l l l l^   X X X XC C C CB B B BJ J J JK K K K   5 5 5 5J J J J$       K K K K K\C! C! C! C!L9 9 9 9"   G G G G45 5 5 5	 	 	 	>G >G >G >GBMG MG MG MG`W W W W| ?C	,W ,W ,W ,W ,Wj ?C`" `" `" `" `" `"P 7;>BV  V  V  V  V  V r #c((" " " "   :R R R R    "P P P PK K K K\[* [* [* [*|a  a  a  a H@ @ @ @' ' ' '<   .K K K K5 5 5 5x ,0	s	& s	& s	& s	& s	&l   B/ / / /$! $! $! $!Ne e e eP   -T -T -T -T` 0 / / / / /5 5 5 5*N *N *N *N` z	 z	 z	 z	 z	zN N N NbO O O Od> > > >H i  7;i  i  i  i  i  i ^ #'#"&' ' ' ' ' ' 'rM   